
    iT             -       ~   d dl Z d dlmZmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlZd dlmZ d dlmZmZmZ d dlmZmZmZmZ d d	lmZ d d
lmZmZ d dlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) d dl*m+Z+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z1m2Z2 d dl3m4Z5 d dl6m7Z7m8Z8 d dl9m:Z;  e	d      Z< ed      Z=ej|                  j~                  Z?ej                  j                  ddd      ZB eCd      \  ZDZEZFd ZGd ZHdeee=e<f   gee=e<f   f   fdZIde!fdZJd ZKd ZL eIe?j                  e?j                  g       e/       dddej                  ddfd               ZP eIe?j                  j                  e?j                  j                  g       e/       d!               ZT eIe?j                  j                  e?j                  j                  g       e/       d"d#d$              ZU eIe?j                         e/       d%               ZV eIe?j                  j                  e?j                  j                  e?j                  j                  e?j                  j                  g       e/d&d'      d(               ZY eIe?j                  j                  e?j                  j                  g       e/       d)               ZZd* Z[d=d+ed,e\e]   d-e^fd.Z_ eIe?j                  j                  e?j                  j                  g       e/       d/               ZadZbd,e\e]   fd0Zc eIe?j                  j                  e?j                  j                  g       e/       d1               Ze eIe?j                  j                        dd2d3       Zh eIe?j                  j                        ej                  dddd4d5       Zj eIe?j                  j                  e?j                  j                  g       e/       ej                  dddd4d6              Zl eIe?j                  j                  e?j                  j                  g       e/       ej                  dddd4d7              Zo eIe?j                  j                  e?j                  j                  g       e/       ddddd4d8              Zq eIe?j                  j                  e?j                  j                  g       e/       d+ed,e\e]   d9e]d:e]fd;              Zs eIe?j                  j                        d=d<       Zud= Zv eIe?j                  j                        d>        Zx eIe?j                        	 	 	 d>d?ed@edAedBedz  dCezdz  dDej                  dz  fdE       Z| eIe?j                        	 d?dFedGedHedDej                  dz  fdI       Z~ eIe?j                        dJdJddKd?edFedGedHedDej                  dz  f
dL       Z eIe?j                        	 	 	 	 	 	 	 d@dMej$                  dNej$                  dBedz  dOedz  dDej                  dz  dPe^dQe]dRe]dSe]fdT       Z eIe?j                  j                        dUdVd+ed,e]dWedXej$                  dYezdZe^defd[       Z eIe?j
                  j                        dUdVd+ed,e]dWedXej$                  dYezdZe^defd\       Z e/        eIe?j                  j                        d]               Z eIe?j                  j                        dddd ddd^d_edYezd`edz  d'edz  daedz  dbe]dce^defdd       Z eIe?j                  j                  e?j                  j                  g       e/       de               Z eIe?j                  j                        d=df       Z eIe?j                   j                  e?j                   j                  g       e/       dg               Z eIe?j                   j                        d=dh       Z eIe?j&                  j                        di        Z eIe?j&                  j                        dj        Z eIe?j,                  j                        dk        Z eIe?j,                  j0                        dl        Z eIe?j4                  j                        dm        Z eIe?j8                  j                        ddddddndo       Z eIe?j<                  j                        dAdp       Z eIe?j>                  j                        d>dq       Z eIe?jB                  j                        dAdr       Z eIe?jD                  j                        ds        Z eIe?jH                  j0                        dt        Zd+eduezfdvZd+edwedxezfdyZ	 dBdzeduezd{e^fd|ZdCdweduezd}ezfd~Zdwedede^duezfdZ	 dDdezded?edezfdZdezfdZ eIe?jZ                  j                  e?jZ                  j\                  g       e/dd      dEdwedezde^fd              Z eIe?j`                  j                  e?jb                  j                  g       e/       d?edefd              Z eIe?jf                  g       e/dd      d?efd              ZdedefdZ eIe?jl                         e/       d+edwede^defd              Z eIe?jn                         e/       d=d+edwede^defd              Z eIe?jp                         e/       d=d+ede^defd              Z eIe?jr                         e/       d=d+ede^defd              Z eIe?jt                  j                        dFdwede^de^fd       Z eIe?jv                  j                  e?jv                  j                  g       e/       d?ededefd              Z eIe?jx                  j                        d=dwede^fd       Z eIe?j|                  j                  e?j|                  j                  g       e/ddd      dddd+ede^de^deeeef   fd              Z eIe?j                  j                  e?j                  j                  g       e/       dddededede^def
d              Z eIe?j                  j                  e?j                  j                  g       e/ddd      dUddwede^deeeef   fd              Z eIe?j                  j                  e?j                  j                  g       e/ddd      dUdddwede^de^deeeef   fd              Z eIe?j                  j                  e?j                  j                  g       e/       dUdddededede^de^defd              Z eIe?j                         e/ddd      	 	 dGdedede^de^deeeef   f
d              Zdezdee^e^f   fdZ eIe?j                  j                  e?j                  j                  g       e/dd      dHdwedezdeeef   fd              Z eIe?j                  j                  e?j                  j                  g       e/dddd      dwedeeeeef   fd              Z eIe?j                  j                        	 	 	 dIdwede^de^dezdz  fd       Zdededee\e]   e\e]   f   fdZdededxezdz  deeef   fdZd?edede^fdZ eIe?j                        dUddddddÜdwedede^de^dedz  dedz  dedz  dedz  deeeeef   fdĄ       Z eIe?j                  j                  e?j                  j                  g      dUdddŜdwedede^de^de^dedz  defdȄ       Z eIe?j                         e/dddU˫      	 	 	 dJd+edwede^de^de^deeef   fd̈́              Z eIe?j                  j                        d΄        Z eIe?j                         e/       	 	 dKd?ededede^de^defdτ              ZdЄ Zdф Z eIe?j                         e/       d҄               Z eIe?j                         e/       dӄ               ZdԄ Z eIe?j                         e/dի      dք               Z eIe?j                         e/dի      dׄ               Zd؄ Z eIe?j                         e/       dل               Z eIe?j                         e/       dڄ               Z eIe?j                  j                        dۄ        Z eIe?j                  j                  e?j                  j                  e?j                  j                  e?j                  j                  g       e/dի      d܄               Zd݄ Z eIe?j                         e/       dބ               Z eIe?j                         e/       d߄               Z eIe?j                  j                  e?j                  j                  e?j                  j                  e?j                  j                  g       e/dի      d               Z eIe?j                         e/       dLd+ededefd              Z eIe?j                         e/       ded+edededef
d              Z eIe?j                   j                  e?j                   j                  g       e/dU˫      dJdJdd              Z eIe?j                  j                  e?j                  j                  g       e/       dd2d              Z eIe?j                  j                        dMd       Z eIe?j                  j                        dMd       Z eIe?j                  j                  e?j                  j                  g       e/       d?d              Z	 eIe?j                  j                        	 	 dFd       Z eIe?j                         e/dU˫      d?dDej                  dz  fd              Zd ZdNdZ	 d?dej$                  d@ej$                  de\e]   e]z  de\e]   e]z  de\e]   e]z  de^de]de\e]   e]z  dz  fdZd Z eIe?j$                  j                        dej$                  d@ej$                  dBej$                  dz  dej$                  dz  dej$                  dz  de^dedefd       Z eIe?j(                  j                        dej$                  d@ej$                  dBej$                  de\e]   de\e]   de\e]   de^de\e]   de]fd       Zej,                  j.                  rej                  j                  d dd      Z eIej|                  j2                  j4                  j                        d        Z eIej|                  j2                  j8                  j                        d        Zej,                  j<                  rOej                  j                  ddd      Z eIej|                  j@                  jB                        d        Z"ej                  j                  ddd      Z# eIej|                  jH                  jJ                  j                         eIej|                  jH                  jL                  j                         eIej|                  jH                  jL                  jN                        d                      Z( eIej|                  jH                  jJ                  jR                         eIej|                  jH                  jJ                  jT                        d               Z+ eIej|                  jH                  jX                  j                         eIej|                  jH                  jX                  jN                        d               Z- eIej|                  jH                  jX                  jR                         eIej|                  jH                  jX                  jT                        d	               Z. eIej|                  jH                  j^                  j                         eIej|                  jH                  j`                  j                        d
               Z1ej                  j                  ddd      Z2 eIej|                  jf                  jh                        	 	 	 	 dOd       Z5 eIej|                  jf                  jl                        d        Z7d Z8 eIe?jr                  j                        	 	 	 	 	 dPd       Z:d Z; eIe?jx                  j                        d        Z= eIe?j|                         e/       	 	 	 	 	 dPd              Z? eIe?j                         e/dի      d               ZA eIe?j                  j                        d        ZC eIe?j                  j                        d        ZE eIe?j                  j                        d        ZG eIe?j                         e/dի      d               ZIded}ezfdZJ eIe?j                         e/dd'      d               ZL eIe?j                         e/dի      d               ZN eIe?j                         e/dd'      d               ZP eIe?j                         e/dի      d               ZR eIe?j                  j$                        d?d        ZT eIe?j                  j                  e?j                  j                  g       e/       d!               ZV eIe?j                  j                  e?j                  j                  g       e/       d"d"d#e]fd$              ZW eIej|                  j~                  j                  j                  ej|                  j~                  j                  j                  g       e/       d%               ZX eIe?j                  j$                  e?j                  j$                  g      d&        Z[ eIe?j                  j                  g      d'        Z] eIe?j                  j                  e?j                  j                  g       e/dU˫      dJdJdd(              Z_ eIe?j                  j$                  g      d)        Za eIe?j                  j                  e?j                  j                  g      ddd*d+       Zd eIe?j                  j                  g      ddd*d,       Zf eIe?j                  g       e/       d-               Zh eIe?j                  g      d.        Zj eIe?j                  g      d/        Zl eIe?j                  g      d0        Zn eIe?j                  g      d1        Zp eIe?j                  g      d2        Zqd3e]d4e]de]fd5Zrd6 Zs eIe?j                  g      dBedz  fd7       Zu eIe?j                  g      d8        Zw eIe?j                  g      d9        Zy eIe?j                  j                        d:        Z{ eIe?j                         e/       d;               Z} eIe?j                  j                        	 	 	 	 	 	 dQd<       Z eIe?j                   j                        d=        ZdBd>Z eIe?j                  j                  e?j                  j                  g       e/       dRdd?d@              Z eIe?j
                  j                  e?j                  j                  g      dA        Z eIe?j
                  j                  e?j
                  j                  e?j                  j                  e?j                  j                  e?j                  j                  e?j                  j                  g       e/d&d'      dSdB              Z eIe?j                  j                        dC        Z eIe?j                  j                        dD        Z eIe?j                   j                        dE        Z eIe?j$                  j&                  e?j(                  j&                  e?j$                  j$                  e?j(                  j$                  e?j*                  j                  e?j,                  j                  e?j.                  j                  g      dF        Z eIe?j2                  j&                  e?j4                  j&                  e?j2                  j$                  e?j4                  j$                  g      ddG       Z eIe?j8                  j&                  e?j:                  j&                  g      ddH       Z eIe?j>                  j                  e?j>                  j@                  g      dI        ZdJ Z eIe?jF                  j$                  e?jF                  j&                  g      dK        Z eIe?jJ                  j$                  e?jJ                  j&                  g      dL        Z eIe?jN                  j                        dM        Z eIe?jR                  j$                  e?jR                  j&                  g      dN        Z eIe?jV                  j$                  e?jV                  j&                  g      dO        Z eIe?jZ                  j                        dP        Z eIe?j^                  j$                         e/       ddefdQ              Z eIe?jb                  g       e/       	 dTdR              Z eIe?jf                  g      	 dTdS       Z eIe?jj                  g      	 dTdT       Z eIe?jn                  j                  e?jp                  j                  g      d=dU       Z eIe?jt                  j&                        dV        Z eIe?jx                  j                        dW        Z eIe?j|                        dX        Z eIe?j                         e/       dY               Z eIe?j                        dZ        Z eIe?j                  j                        d=d[       ZŐdAd\Z eIe?j                  j                        d]        Z eIe?j                  j                        d^        Zɐd_ Zʐd` Zːda Z̐db Z	 d=d?edce]dde]dee]dfe]dge]dhe]die]dje]dke]dle]dme]dne]doe]dpe]dqe]dre]dse]dte]due]dezdve^f,dwZΐdx Zd?ededce]dde]dee]dfe]dge]dhe]die]dje]dke]dle]dpe]dqe]dre]dse]dte]due]dezf&dyZАdz Z eIe?j                  j                        d{        Z eIe?j                  j                        	 	 	 	 dOd|       Z eIe?j                  j                        d}        Z eIe?j                         e/dd'      	 	 	 	 dOd~              Z eIe?j                         e/dի      d               Zd?edefdZ G d de      Zd?edede]fdZ eIe?j                  j                        d        Z eIe?j                         e/       d               Z eIe?j                         e/dՐd      d               Z eIe?j                  j                  g      d        Z eIe?j                  j                        	 	 	 	 	 dUd       Z eIe?j                  j                  e?j                  j                  g       e/       ddddddd              Z eIe?j                  j                  e?j                  j                  g       e/       ddddddd              Z eIe?j                  j                        d        Z eIe?j                  j                        dVd       ZdBd,e]de]de^fdZd Zd Z eIe?j                  j                        d=d       Zd=dZd?dZd Zd?dZdWdZ eIe?j                  j                        d        Z eIe?j                        d        Z eIe?j                  j                  e?j                  j                  e?j                  j                  e?j                  j                  g       e/       d?d              Z  eIe?j                  j                  e?j                  j                  e?j                  j                  e?j                  j                  g      d?d       Z eIe?j                  j                  g      	 	 	 	 dXdedededede^de^dedz  fd       Z eIe?j                  jf                  g      	 	 	 	 dXdededededz  dedz  dedz  dede^de^dedz  fd       Zdedee]df   fdZ eIe?j                  g      	 	 	 	 dXdededededz  de^dede^de^dedz  fd       Z eIe?j                  g      	 	 	 	 	 dYdededededz  dede^de^dedz  fd       Z
 eIe?j                  g      	 d?dedededededededede]de]dede^dedededz  fd       Z eIe?j                  g      	 	 	 	 dZdedededede^dedz  dedz  fd       Z eIe?j                  g      	 	 dAdededededededede^dedz  dedz  fd       Z eIe?j"                  g      	 	 	 	 	 d[dededededz  dede^dedz  dedz  deeef   fd       Z eIe?j&                  g      	 	 	 d\dededededz  de^de^dedz  fd       Z eIe?j*                  g      	 	 d]dedededededz  dededededede\e^   de^dedz  fdÄ       Z eIe?j.                  g      	 d?dededededededededededede]de]dede^dedz  f dĄ       Z eIe?j2                  j                  g      	 	 	 	 	 dUdededededz  dedz  de]de]dede^de^dedz  de]dz  de]dz  dedz  dedz  fdɄ       Z eIe?j2                  jf                  g      	 	 	 	 	 dUdededededz  dedz  de]de]dede^de^dedz  dedz  dedz  dedz  de]dz  de]dz  dedz  dedz  f$dʄ       Z eIe?j8                  g      	 	 	 d>dedededededededede]de]dede^dedededz  de]dz  de]dz  f"d˄       Z eIe?j<                  g      	 	 	 	 	 d^dedededBedz  dedz  dedz  de]dz  de]dz  dede]de^dedz  dedz  dedz  de]dz  fdԄ       Z eIe?j@                  g      	 	 	 dWdededededBedz  dedz  dedz  dejB                  dejB                  dedededede]de^dedz  de]dz  de^f$d؄       Z"	 	 	 	 d_d+ej$                  dHej$                  dej$                  dej$                  dBej$                  dz  dej$                  dz  dDej                  dz  de^fd݄Z# eIe?jH                  j                  g      	 	 	 	 d_d+ej$                  dHej$                  dej$                  dej$                  dBej$                  dz  dej$                  dz  dDej                  dz  de^fdބ       Z%	 	 	 	 	 d`d+ej$                  dHej$                  de\ej$                     de\e7   de\ej$                     de\e7   dBej$                  dz  dDej                  dz  de\e8   dz  de\e8   dz  de^fdZ& eIe?jN                  j                  g      	 	 	 	 d_d+ej$                  dHej$                  de\ej$                     de\e7   de\e8   de\ej$                     de\e7   de\e8   dBej$                  dz  dej                  dz  de\e]   dz  de^fd       Z( eIe?jR                  jT                  e?jR                  jV                  g       e/       dBd              Z, eIe?jZ                  jT                        dBd       Z. eIe?j^                  j                  e?j^                  j                  g       e/       d=dd2d              Z0d Z1d Z2 eIe?jf                  j                  e?jh                  j                  g      d?d       Z3 eIe?jj                  j                  e?jl                  j                  g      dAd       Z5 eIe?jn                  j                  e?jp                  j                  g      	 	 dAdedee]ejB                  z     dee]ejB                  z     dedz  dedz  f
d       Z7 eIe?jr                  j                  e?jt                  j                  g      d>d       Z9 eIe?jv                  j                  e?jv                  jx                  e?jv                  j                  e?jv                  jz                  g      dad       Z>d Z? eIe?j                  j                        	 	 dAd       ZA eIe?j                  j                        d        ZB eIe?j                  j                        d        ZC eIe?j                  j                        d        ZDd ZEd ZF eIe?j                  j                  e?j                  j                  g      dRd       ZI eIe?j                  j                        dbd       ZJ eIe?j                  j                        dcd       ZL eIe?j                         e/       	 ddd              ZN eIe?j                  j                  e?j                  j                  g       e/d&d'      dSd               ZPej                  ZRd ZS eIe?j                  j                        d        ZT eIe?j                  j                        d        ZU eIe?j                  j                        d        ZW eIe?j                  j                        d        ZX eIe?j                  j$                  e?j                  j                  g       e/       dddd              Z[ eIe?j                  j&                  e?j                  j                  g      dddd+e&ded	e^d
e^fd       Z] eIe?j                  g       e/       ded              Z_ eIe?j                  j                  e?j                  j                  g      	 	 dAd       Zb eIe?j                  j                  g      	 	 dAd       Zd eIe?j                  j                        d        Ze eIe?j                  j                  e?j                  j                  g       e/       d>d              Zf eIej|                  j~                  j                        d        Zg eIej|                  j~                  j                        d        Zh eIe?j                         e/       dddddd              Zjd Zk eIe?j                        d        Zm eIe?j                        	 dfd       Zo eIe?j                        	 dfd       Zq eIe?j                        	 dfd       Zs eIe?j                         e/       dddd              Zu eIe?j                         e/       de]d+edefd              Zw eIe?j                        d+efd       Zy eIe?j                         e/dU˫      d+edefd              Zz eIe?j                         e/       d+edefd               Z{d! Z|	 	 	 	 	 d`d"ed#edej$                  dz  dej$                  dz  d$edz  dBedz  dej$                  dz  dDej                  dz  de^fd%Z} eIe?j                         e/       	 	 	 d>d"ed#ed$edz  dBedz  dDej                  dz  defd&              Z eIe?j                   g      	 	 	 	 	 d`d"ej$                  d#ej$                  dej$                  dej$                  d$ej$                  dz  dBej$                  dz  dej$                  dz  dDej                  dz  de^fd'       Z eIe?j                  j&                        dgd(       Z eIe?j                         e/       d)ed,e]d*e^defd+              Z eIe?j                         e/       dd,              Z eIe?j                         e/       	 	 	 dhd@ed'ed-e]d.e^d/e^defd0              Z eIe?j                  j                        	 did&edae\e   d1e\e]   d2efd3       Zd4 Z eIe?j                  j                        	 	 	 	 djd5       Zd6 Z ee?j                          ee?j                           ee?j"                          ee?j$                          ee?j&                          ee?j(                          ee?j*                          ee?j,                          ee?j.                          ee?j0                          ee?j2                          ee?j4                          ee?j6                          ee?j8                          ee?j:                          ee?j<                          ee?j>                          ee?j@                          ee?jB                          ee?jD                          ee?jF                         d7 Z eIe?jJ                         e/       d8               Z eIe?jL                         e/       dJd9d:              Z eIe?jN                         e/       dJd9d;              Z ee?jJ                        Z ee?jL                        Z ee?jN                        Zd dl0Zd dlZd dlZd< Z e        y(k      N)CallableSequence)Enum)wraps)TypeVar)	ParamSpec)SymBoolSymFloatTensor)_add_op_to_registry_convert_out_paramsglobal_decomposition_table
meta_table)
OpOverload)_prim_elementwise_meta$ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND)BoolLikecorresponding_complex_dtypecorresponding_real_dtypeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KIND	FloatLikeIntLikemake_contiguous_strides_forNumber
NumberTypesuggest_memory_formatsym_min
TensorLike)_maybe_convert_to_dtype_maybe_resize_out_resize_output_check_safe_copy_outout_wrapper)_broadcast_shapes_maybe_broadcast)_config)ScalingTypeSwizzleType)_pytree_T_PatenIMPLMeta   c                     | |z   dz
  |z  S N    abs     j/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/torch/_meta_registrations.pyceil_divr9   9   s    EAI!    c                     | |z   dz
  |z  |z  S )z$Rounds up x to nearest multiple of yr3   r4   xys     r8   round_upr?   =   s    UQY1!!r:   returnc                       fd}|S )Nc                 V     t                 fd}t        j                  |        S )Nc                 (    t        t        |        y N)r   r   )opfns    r8   registerz0register_meta.<locals>.wrapper.<locals>.registerF   s    
B3r:   )r   pytree	tree_map_)rF   rG   rE   s   ` r8   wrapperzregister_meta.<locals>.wrapperC   s)     $	4 	2&	r:   r4   )rE   rJ   s   ` r8   register_metarK   B   s     Nr:   type_promotionc                     t        j                  |d| i\  }}|D cg c]  }t        ||       }}t        | }t	        |dt
        j                  iS c c}w )Ntype_promotion_kindrL   )utilsr   r    r&   r   r   DEFAULT)rL   args_result_dtyper=   s        r8   elementwise_metarT   O   ss    
 ..	*OA| ?CC#A|4CDC T"D "	BJJ  Ds   Ac                     t         j                  t         j                  t         j                  t         j                  t         j
                  t         j                  i}|j                  | |       S rD   )torch	complex32halfcfloatfloatcdoubledoubleget)dtypefrom_complexs     r8   toRealValueTyper`   c   sE    ekku||L
 E5))r:   c                 l     t        t         g|       t        j                   k(   fd       y )Nc                      d d  S )Nzoutput with shape z# doesn't match the broadcast shape r4   )broadcasted_shape
self_shapes   r8   <lambda>z)check_inplace_broadcast.<locals>.<lambda>p   s    $ZL0STeSfg r:   )tupler%   rV   _check)rd   
args_shaperc   s   ` @r8   check_inplace_broadcastri   l   s0    /
HZHI	LLZ'gr:   Fc	                    	 t         t        j                        r(t        j                   j	                         dk(  d        t        t        j                        r(t        j                  j	                         dk(  d        t        d  fD              rZt        j                  t        j                               		nFt        j                  t        j                        	fd       nxs t        j                         t        t        j                        st        dt                     t        j                  t        t               fd       t        t              st        dt                     t        j                  dk\  d	        t        j                  f|d
||      S )Nr   c                       yNz:linspace only supports 0-dimensional start and end tensorsr4   r4   r:   r8   re   z(meta_linspace_logspace.<locals>.<lambda>       r:   c                       yrl   r4   r4   r:   r8   re   z(meta_linspace_logspace.<locals>.<lambda>   rm   r:   c              3   <   K   | ]  }t        |t                y wrD   )
isinstancecomplex).0args     r8   	<genexpr>z)meta_linspace_logspace.<locals>.<genexpr>   s     
C:c7#
Cs   c                      d  d S )Nzlinspace(): inferred dtype z& can't be safely cast to passed dtype r4   )default_complex_dtyper^   s   r8   re   z(meta_linspace_logspace.<locals>.<lambda>   s    56K5LLrsxryz r:   zdtype must be torch.dtype, got c                      dt              j                   dt               j                   dt              j                   dS )Nz4received an invalid combination of arguments - got (, ))type__name__)endstartstepss   r8   re   z(meta_linspace_logspace.<locals>.<lambda>   sD     u+r$s),,-RU0D0D/EQH r:   zsteps must be IntLike, got c                       y)Nz$number of steps must be non-negativer4   r4   r:   r8   re   z(meta_linspace_logspace.<locals>.<lambda>   rm   r:   metar^   layoutdevice
pin_memoryrequires_grad)rp   rV   r   rg   dimanyrO   r   get_default_dtypeis_complex_dtyper^   AssertionErrorrz   _check_typer   empty)
r}   r|   r~   baser^   r   r   r   r   rv   s
   ``` `    @r8   meta_linspace_logspacer   t   sn    %&IIK1P	
 #u||$GGINP	

 
CsE/B
CC % A A##%!
 =)ELL&&u-z
 2002eU[[)>tE{mLMM 
5'"	H
 eW%:4;-HII	LL!KL;;	# r:   c                    t        j                  j                  t         j                  k(  fd       t        j                  | j                         dk(  xr j                         dk7   d        | j                  j                        S )Nc                  "    d j                    S )Nz2take(): Expected a long tensor for index, but got r^   indexs   r8   re   zmeta_take.<locals>.<lambda>   s    DU[[MR r:   r   c                       y)Nz*take(): tried to take from an empty tensorr4   r4   r:   r8   re   zmeta_take.<locals>.<lambda>   rm   r:   )rV   rg   r^   long_check_indexnumel	new_emptyshape)selfr   s    `r8   	meta_taker      sm     
LLuzz!R
 
ZZ\Q55;;=A#56< >>%++&&r:   r   c                T     j                   }j                   }t        j                  ||k(  d        t        j                   j                        dk(  xr j                        dk(   fd       t	         j
                  j
                        } j                  |      S )Nc                       y)Nz=linalg.cross: inputs must have the same number of dimensions.r4   r4   r:   r8   re   zlinalg_cross.<locals>.<lambda>   rm   r:   r0   c                  V    d  dj                          dj                          S )Nzlinalg.cross: inputs dimension z must have length 3. Got  and size)r   otherr   s   r8   re   zlinalg_cross.<locals>.<lambda>   s6    -cU 399S>"%

3'8: r:   )ndimrV   rg   r   r%   r   r   )r   r   r   x_dy_d	out_shapes   ```   r8   linalg_crossr      s     ))C
**C	LLs
O 
LL		#!4

31 4	
 "$**ekk:I>>)$$r:   c                 |    t        | d       t        | d       t        j                  | t        j                        S )Nzlinalg.matrix_expmemory_format)squareCheckInputscheckFloatingOrComplexrV   
empty_likecontiguous_formatr   s    r8   linalg_matrix_expr      s3     d/04!45D0G0GHHr:   valuesindicesc                 Z   t        j                  | j                  | j                  | j                        }t        j                  | j                  | j                  t         j
                        }| j                         dk7  r%| j                  dk7  rt        || j                         ||fS )Nr   r^   r   )	rV   r   r   r   r^   int64r   r   maybe_wrap_dim)r   r   r   r   s       r8   	cummaxminr      sp    
 [[DKKtzzJFkk$**T[[LGzz|qTYY!^sDII&7?r:   c                 x    t        || j                         t        j                  | t        j                        S Nr   )r   r   rV   r   r   )r   r   s     r8   logcumsumexpr      s+     3		"D0G0GHHr:   c                D   |j                   }t        |      }||z
  }t        t        |            }t        |      D 	cg c]  }	d }
}	|D ]  }d|
|<   	 g g }}|D ]*  }|
|   s|j	                  |       |j	                  |       , ||z   }t        |      }|j                         |d | }|j                  fdd       |||d  z   }|j                  |      }dgt        |j                  |d        z   }|j                  |      }|j                  d      }||d<   t        |      }t        t        |            D ]  }|||      ||dz   <    | j                  |t        j                         t        |      D 	cg c]  }	d }}	d}|dz
  }|dk\  r0|| j                  d      z  |||   <   ||||      z  }|dz  }|dk\  r0t        ||      D ]  }| j                  d||z
  z         |||   <   ! | j                  ||| j                                | S c c}	w c c}	w )	NFTc                     |    S rD   r4   )r=   self_stridess    r8   re   z_exec_fft.<locals>.<lambda>  s    <? r:   keyreverser   r   r3   r   )r   lenlistrangeappendstridesortpermuter   reshaper   resize_rV   r   as_strided_storage_offset)outr   	out_sizesr   forwardr   signal_ndim
batch_dimsdim_permuterR   is_transformed_dimdleftright	batch_endtmpinputbatched_sizes
batch_sizebatched_out_sizesiout_stridesbatch_numelr   s                          @r8   	_exec_fftr      sZ   99Dc(K#J uT{#K).t5A%55 % $1% b%D !!$KKNLLO	
 ,KD	I;;=L
jy
!CHH*DH9IJ//KLL%E D4JK 899MMM-(EAJ!M!]+3s8_ 5#,SV#4!a% 5KK!1H1HKI $Dk*1*K*KQA
q&&1CJJqM&AKN#yQ00	Q q& :t$ G&)jja*n1E&FKN#GOOI{C,>,>,@AJW 6@ +s   	H 	Hr   r   exclude_lastc                     t        |      }| j                         |d t        |      t        |      z
   j	                  fd       |S )Nc                     |    S rD   r4   )r   r   s    r8   re   z_sort_dims.<locals>.<lambda>0  s    l1o r:   )r   )r   r   r   intr   )r   r   r   sorted_dimsr   s       @r8   
_sort_dimsr   ,  sL    s)K;;=L6#k"S%667<<% =  r:   c                 
   t        j                  | j                  j                         |s| j	                         S t        | |      }| j                  | j                               }t        || | j                         ||      S )Nr   )	rV   rg   r^   
is_complexcloner   r   r   r   )r   r   normalizationr   r   r   s         r8   meta_fft_c2cr   7  sb     
LL&&'zz|T3'K
..
%CS$		['JJr:   c                 f    t        |       t        kD  st        |       dk\  r| d   dk(  r	| d   dk(  ryy)N   r   r3   FT)r   cufft_max_ndimr   s    r8   use_optimized_cufft_pathr   F  s3    
3x. SX]s1v{s1vQR{r:   c                 z   t        j                  | j                  j                         t	        | j                               }t	        |      }|d   }||   dz  dz   }t	        |      }|||<   |r|||<   t        |       dk(  st        |       dk(  rz| j                  |t        j                  | j                              }	| }
t        |       dk(  rt        |      rt        |	|
||d       nt        |      dk(  r|n|}t        |	|
||gd       t        |      dkD  r0| j                  |t        j                  | j                              }
|d d }|rx|
|	}
}	|
j                         |j                  fd	d
       t        t         t        |            }|t        |      |z
  d  }t        |	|
||d       |d t        |      |z
   }|rx|s:|	j                  |      ||   k7  r#|
j#                  |t         j$                         |
}	|	S | j                  |t        j                  | j                              S )Nr   r   r3   cudaxpur   Tr   c                     |    S rD   r4   )r   stridess    r8   re   zmeta_fft_r2c.<locals>.<lambda>t  s    '!* r:   r   r   )rV   rg   r^   is_floating_pointr   r   device_hintr   rO   r   r   r   r   r   r   minr   r   r   )r   r   r   onesidedinput_sizesr   last_dimlast_dim_halfsizeonesided_sizesoutputworking_tensortarget_sizesr   max_dims	last_dimsr   s                  @r8   meta_fft_r2cr  M  s>    
LL--.tyy{#K[!I2wH#H-2Q6+&N0N8/	(4F"k$&75&@ U>>tzzJ   
 t&+CC+HfnidK ),CA9>LfnlXJPTU3x!|!%U%F%Ftzz%R "0 "
 cr(K)7(//1  ,d !  ~s;/?@'K(88(C(EF	NNIt **GC,<x,GH  {{8$	((;;&&y@W@W&X' ~~U>>tzzJ  
 	
r:   )	generatorc                B    t        |t        j                  | g            S rD   )r!   rV   Size)nr  r   s      r8   meta_randpermr	    s    S%**aS/22r:   r^   r   r   r   c                6    t        j                  | ||||      S Nr
  rV   r   )r  r^   r   r   r   s        r8   meta_randperm_defaultr    s      ;;	vf r:   c                x     dt        j                   kD   fd       t        j                  |||||      S )Nr   c                      d d  S Nz:random_ expects 'from' to be less than 'to', but got from=z >= to=r4   highlows   r8   re   zmeta_randint.<locals>.<lambda>      LSEQXY]X^_ r:   r
  rV   rg   r   )r  r   r^   r   r   r   r  s   `     @r8   meta_randintr    s>     C	LLs
_ ;;E&J r:   c                t     t        j                   kD   fd       t        j                  |||||      S )Nc                      d d  S r  r4   r  s   r8   re   z"meta_randint_low.<locals>.<lambda>  r  r:   r
  r  )r  r  r   r^   r   r   r   s   ``     r8   meta_randint_lowr    s9     
LLs
_ ;;E&J r:   c                6    t        j                  | ||||      S r  r  )r   r^   r   r   r   s        r8   meta_rand_defaultr    s      ;;E&J r:   r   lastdimc                    t        j                  | j                  j                         t	        |       dk(  rt        | j                               }|||d   <   | j                  |t        | j                              }t        |      r.t        || j                  t         j                        ||d      S t        |      dkD  rt        | |d d d|      }n | j                  t         j                        }t        ||||d   gd      S | }t        |      dkD  r|d d }t        | ||d      }|dd  }t        |j                               }|||d   <   | j                  |t        | j                              }	t        |	|||d      S )	Nr   r   r   r   Fr   r3   r   )rV   rg   r^   r   r   r   r   r   r`   r   r   r   r   r   r   )
r   r   r   r  r   r   tempr   c2c_dimsr   s
             r8   meta_fft_c2rr!    sk    
LL&&'4F"%	$	#b'	1LM#C(

)@)@
A  3x!|#D#cr(Aw?zz0G0GzHVT9s2wiOO s8a<3BxH xNEbc(C&	$	#b'nnYodjj.InJeYUCCr:   c                 J   ddl m}  ||       s#t        j                  |       dk(  rt	        d      t        |t              ra|j                  | |      }| j                         |j                         k7  r.t        j                  j                  || j                                | S )Nr   )free_unbacked_symbolsr3   zQmore than one element of the written-to tensor refers to a single memory location)%torch.fx.experimental.symbolic_shapesr#  rV   _debug_has_internal_overlapRuntimeErrorrp   r   tor   r-   expand_copydefault)r   srcnon_blockingr#  intermediates        r8   
meta_copy_r-    s     L "$'E,M,Md,SWX,X_
 	
 #vvvdL199;,++--$$\499;?Kr:   c                     t        | j                               }t        | j                               }|| j                         k\  rdn
||   ||   z  }|j	                  |d       |j	                  ||       ||fS r2   )r   r   r   r   insert)tensorr   result_sizesresult_strides
new_strides        r8   inferUnsqueezeGeometryr4    sq    &L&--/*NVZZ\)|C/@>RUCV/VJQ#z*''r:   c                     t        || j                         dz         }t        | |      \  }}| j                  ||       | S r2   )r   r   r4  r   )r   r   g_sizes	g_stridess       r8   meta_unsqueeze_r8    s>    
dhhj1n
-C/c:GYWi(Kr:   r   weight_metabias_activation_opt	out_dtypec                 F   t        | j                        }|R|j                  d      |j                  d      k7  r/t        d|j                  d       d|j                  d             |j                  d      | j                  d      dz  k7  r2t        d|j                  d       d| j                  d      dz         |j                  d      |d<   t	        | j                        dk7  r"t        d	t	        | j                         d
      d| j                  d      f}|K| j
                  t        j                  k(  r|t        j                  k(  st        d| j
                   d|       | j                  ||| j
                  n|      j                  ||      }|S )Nr   z%output size mismatch: weight.size(0)= != bias.size(0)=r3   r   r   zweight.size(1)=z != input.size(-1)/2=z0we can only handle the squashed input case, got D inputzKout_dtype is only supported for i8i8->i32 linear operator, got input.dtype=, out_dtype=r   )r   r   r   r   r   r^   rV   int8int32r   
as_strided)	r   r9  r:  r;  r<  r=  output_sizestransposed_stridesr   s	            r8   meta_sparse_structured_linearrG  %  s    $L;;q>TYYq\) 7A7GGXY]YbYbcdYeXfg  {{1~B!++fkk!n--B5::b>TUCUBVW
 	
 {{1~L 5;;1>s5;;?O>PPWX
 	
 UZZ]+uzz)i5;;.F ]^c^i^i]jjv  xA  wB  C  __&.ekkI   j12 
 Mr:   mat1	mat1_metamat2c                 2   t        | j                        dk7  r"t        dt        | j                         d      t        |j                        dk7  r"t        dt        |j                         d      t        |j                        dk7  r"t        dt        |j                         d      | j                  d      |j                  d      dz  k7  r2t        d| j                  d       d	|j                  d      dz         | j                  d      |j                  d      g}|K|j                  t
        j                  k(  r|t
        j                  k(  st        d
|j                   d|       |j                  |||j                  n|      }|S )Nr   mat1 must be 2D, got Dmat1_meta must be 2D, got mat2 must be 2D, got r3   r   mat1.size(1)= != mat2.size(0)/2=Jout_dtype is only supported for i8i8->i32 linear operator, got mat2.dtype=rA  r   	r   r   r   r   r^   rV   rB  rC  r   )rH  rI  rJ  r=  rE  r   s         r8   meta_sparse_structured_mmrT  Q  sv    4::!4S_4EQGHH
9??q 9#ioo:N9OqQRR
4::!4S_4EQGHHyy|tyy|a''DIIaL>)<TYYq\A=M<NO
 	
 IIaL$))A,/L

ejj(Y%++-E \]a]g]g\hhtu~t  A  ^^%-djj9  F
 Mr:   r3   )alphabetar=  c                J   t        | j                        dk7  r"t        dt        | j                         d      t        |j                        dk7  r"t        dt        |j                         d      t        |j                        dk7  r"t        dt        |j                         d      t        |j                        dk7  r"t        dt        |j                         d      | j                  d	      |j                  d	      k7  r/t        d
| j                  d	       d|j                  d	             |j                  d      |j                  d	      dz  k7  r2t        d|j                  d       d|j                  d	      dz         |j                  d	      |j                  d      g}|K|j                  t
        j                  k(  r|t
        j                  k(  st        d|j                   d|       |j                  |||j                  n|      }|S )Nr3   zKonly input broadcasted to columns of mat1 * mat2 product is supported, got r@  r   rL  rM  rN  rO  r   zUonly input broadcasted to columns of mat1 * mat2 product is supported, input.size(0)=z != mat1.size(0)=rP  rQ  rR  rA  r   rS  )	r   rH  rI  rJ  rU  rV  r=  rE  r   s	            r8   meta_sparse_structured_addmmrX  q  s    5;;1YZ]^c^i^iZjYkkrs
 	
 4::!4S_4EQGHH
9??q 9#ioo:N9OqQRR
4::!4S_4EQGHHzz!}		!$"ZZ]O+<TYYq\NL
 	
 yy|tyy|a''DIIaL>)<TYYq\A=M<NO
 	
 IIaL$))A,/L

ejj(Y%++-E \]a]g]g\hhtu~t  A  ^^%-djj9  F
 Mr:   compressed_Adense_BrU  transpose_resultalg_idsplit_ksplit_k_modec	           	      *   |j                   t        j                  t        j                  t        j                  t        j
                  t        j                  hvrt        d|j                          | j                   |j                   k7  r%t        d| j                    d|j                          t        |j                        dk7  r"t        dt        |j                         d      | j                   t        j
                  t        j                  fv }	|	r|j                         rt        d      |j                  d      }
| j                  d	      }|4||j                  d	      k7  r t        d
| d|j                  d	             |k|	r@|t        j                  t        j                  t        j                  t        j                  hv s)t        d| j                    d|j                    d| d      |r|
|fn||
f}|j                  ||      S )NzA_cslt_sparse_mm only supports fp16, bf16, int8, and fp8e4m3, got z%inputs must have the same dtype, got r   r   z-_cslt_sparse_mm only supports 2d inputs, got rM  z.dense input must be transposed for 8bit dtypesr3   r   zbias size mismatch: m=r?  zout_dtype is not supported for z x z -> z matmul!r   )r^   rV   float32float16bfloat16rB  float8_e4m3fnr   r   r   is_contiguousr   rC  r   )rY  rZ  r;  rU  r=  r[  r\  r]  r^  is_8bit_input_typer  moutput_shapes                r8   meta__cslt_sparse_mmrh    s    }}

  OPWP]P]_
 	
 W]]*3L4F4F3GuW]]O\
 	
 7==Q;C<N;OqQ
 	
 &++

E<O<O/PP  " !QRRQA!A		! (+<TYYq\NK  ##	 !1,2D2D1ESW[\e[ffno  .Aq6Aq6L\;;r:   T)include_selfr   sourcereduceri  c                L    t        j                  | t         j                        S r   rV   r   r   r   r   r   rj  rk  ri  s         r8   meta_index_reducero    s     D0G0GHHr:   c                    | S rD   r4   rn  s         r8   meta_index_reduce_rq    s	     Kr:   c                     t        | j                               }| j                         dkD  r|j                         ||<   | j	                  |      S Nr   )r   r   r   r   r   )r   r   r   result_sizes       r8   meta_index_selectru    s@     tyy{#KxxzA~ ;;=C>>+&&r:   )lengthsr   offsetsaxisunsafeinitialdatarv  rw  rx  ry  c                     |t        d       fd}| ||j                        S |+|j                  d d |j                  d   dz
  fz   }	 ||	      S t        d      )Nz?segment_reduce(): indices based reduction is not supported yet.c                     t        j                  | j                  dz   d  z   j                  dt         j                        S )Nr3   r   r^   r   r   )rV   r   r   r^   r   )lengths_shaperx  r{  s    r8   segment_reduce_lengths_tensorz:meta_segment_reduce.<locals>.segment_reduce_lengths_tensor  s>    {{DJJtaxz22**11	
 	
r:   r   r3   z<segment_reduce(): Either lengths or offsets must be defined.)NotImplementedErrorr   r&  )
r{  rk  rv  r   rw  rx  ry  rz  r  r  s
   `    `    r8   meta_segment_reducer    s|     !M
 	

 ,W]];; cr*gmmB.?!.C-EE,];;
U
VVr:   c                 $    | j                  d      S Nr4   r   r   s    r8   meta_maxr  %       >>"r:   c                     t        j                  | j                  |f      }t        | ||      }| j	                  |      | j	                  |t
        j                        fS Nr   rO   reduction_dimsr   _compute_reduction_shaper   rV   r   r   r   keepdimrg  s       r8   meta_max_dimr  +  R    


tzzC6
2C+D#w?L|$|5::6 r:   c                 $    | j                  d      S r  r  r   s    r8   meta_minr  5  r  r:   c                     t        j                  | j                  |f      }t        | ||      }| j	                  |      | j	                  |t
        j                        fS r  r  r  s       r8   meta_min_dimr  ;  r  r:   c                     | j                         rt        | j                        }nt        | t        j
                        \  }}t        j                  | |      S NrN   r   )r   r   r^   r   r   INT_TO_FLOATrV   r   )r   rS   rR   s      r8   
meta_angler  E  sI    /

;, ? L L
< D55r:   c                     t        j                  || j                         | j                         |j	                  t        j
                  |             S rD   )rV   _resize_output_r   r   copy_angle)r   r   s     r8   meta_angle_outr  Q  s6    	#tyy{DKK899U[[&''r:   c                      y rD   r4   )vals    r8   assert_asyncr  W      
r:   c                      y rD   r4   )r  
assert_msgs     r8   assert_async_metar  \  r  r:   c                      y rD   r4   )ss    r8   
print_metar  a  r  r:   r^   r   r   r   r   c                 0    t        j                  dd      S )Nr   r   r   r  r  s        r8   make_dep_tokenr  f  s     ;;q((r:   c                 h    ddl m} t        | t        t        f      rt        d       || ||       y )Nr   )constrain_range'Constraining SymFloat or Symbool is nyir   max)r$  r  rp   r
   r	   
ValueError)r   r   r  r  s       r8   sym_constrain_ranger  r  s/     F$7+,BCCDcs+r:   c                 6    t         j                  | ||       |S Nr  )r-   r  r   r   r  	dep_tokens       r8   functional_sym_constrain_ranger  |  s    Ts4r:   c                 .   ddl m} ||t        j                  | dk\         y t	        | t
        t        f      rt        d      t        |       t        u r5|t        j                  | |k\         |t        j                  | |k         y  || ||       y )Nr   )_constrain_range_for_sizer  r  )
r$  r  rV   rg   rp   r
   r	   r  rz   r   )r   r   r  r  s       r8   sym_constrain_range_for_sizer    s     P
{s{TQY$7+,BCCDzS?LL%?LL%d5r:   c                 6    t         j                  | ||       |S r  )r-   r  r  s       r8   'functional_sym_constrain_range_for_sizer    s    %%d%=r:   c                     |S rD   r4   )r  r  r  s      r8   functional_assert_async_metar    s    r:   f_namec                       j                         dk  rt         d j                                t        j                   j	                  d       j	                  d      k(   fd       y )Nr   z8: The input tensor must have at least 2 dimensions, got r   c                  V      dj                  d       dj                  d       dS )Nz5: A must be batches of square matrices, but they are r   by r   	 matricesr   )r  r   s   r8   re   z#squareCheckInputs.<locals>.<lambda>  s1    6( 		"d499R=/D r:   )r   r   rV   rg   r   )r   r  s   ``r8   r   r     s^    xxzA~hNtxxzl[
 	
 
LL		"2&	Dr:   Anamec                     t        j                   j                  j                  k(   fd       t        j                   j                  j                  k(   fd       t        j                  j	                  d      j	                  d      k(  fd       t        j                  j	                  d       j	                  d      k(   fd       y )Nc                  >    dj                    d j                    dS )Nz:Expected b and A to be on the same device, but found b on z
 and A on 	 instead.r  r  r   s   r8   re   z(linearSolveCheckInputs.<locals>.<lambda>  s%    H{{m:ahhZy: r:   c                  >    dj                    d j                    dS )Nz=Expected b and A to have the same dtype, but found b of type z and A of type r  r   r  s   r8   re   z(linearSolveCheckInputs.<locals>.<lambda>  s%    Kzzl/!'')= r:   r   r  c                  R    d j                  d       d j                  d       dS )Nz3A must be batches of square matrices, but they are r  r  r   r  r   r  s   r8   re   z(linearSolveCheckInputs.<locals>.<lambda>  s0    FF2J<tAFF2J<yB r:   c                      d d j                  d       d j                  d       dj                  d       dj                  d       
S )NzIncompatible matrix sizes for z: each A matrix is r   r  z but each b matrix is r  r   )r  r  r   s   r8   re   z(linearSolveCheckInputs.<locals>.<lambda>  sR    ,TF 3D$TYYr]O4		"H r:   )rV   rg   r   r^   r   )r   r  r  s   ```r8   linearSolveCheckInputsr    s    	LLqxx	
 
LL

agg	
 
LL	r
affRj 	
 
LL	r
diim#	
r:   tallow_low_precision_dtypesc                 J   | j                   t        j                  | j                         xs | j	                         fd       |sYt        j                  t        j
                  t        j                  t        j                  t        j                  fv fd       y y )Nc                       d  S )Nz<: Expected a floating point or complex tensor as input. Got r4   r^   r  s   r8   re   z(checkFloatingOrComplex.<locals>.<lambda>  s    6(VW\V]^ r:   c                       d  S )Nz*: Low precision dtypes not supported. Got r4   r  s   r8   re   z(checkFloatingOrComplex.<locals>.<lambda>  s    vhHP r:   )	r^   rV   rg   r   r   rZ   r\   rY   r[   )r  r  r  r^   s    ` @r8   r   r     sn    
 GGE	LL	/^ &ekk5<<u}}MMP	
 &r:   arg_namec                 ^    t        j                  | j                         dk\  fd       y )Nr   c                       d  dS )Nz: The input tensor z! must have at least 2 dimensions.r4   )r  r  s   r8   re   zcheckIsMatrix.<locals>.<lambda>  s    6(-hZ7XY r:   )rV   rg   r   )r  r  r  s    ``r8   checkIsMatrixr    s    	LL	1Yr:   Br   c                      t                t               t        j                  r# j	                  d      j	                  d      k(  n" j	                  d      j	                  d      k(   fd       y )Nr  r   c                       drdnd d j                  d       d j                  d       dj                  d       dj                  d       d	S )
Nz2: Incompatible shapes of A and B for the equation zAX = BzXA = Bz (r  r=   r   r   ry   r   )r  r  r  r   s   r8   re   z#checkInputsSolver.<locals>.<lambda>  s[    hHxX.AaffRj\qvvbzl!AFF2J<qJ r:   )r   r  rV   rg   r   )r  r  r   r  s   ````r8   checkInputsSolverr    sY    a !V	LL$(r
affRj affRjAFF2J.F	
r:   resultfn_nameresult_namec                 r     t        j                  j                  j                  k(   fd       y )Nc            	      L      d d dj                    dj                    	S )Nz: Expected z5 and input tensors to be on the same device, but got z on z and input on r  )r  r   r  r  s   r8   re   z!checkSameDevice.<locals>.<lambda>  s5    i{;-/dm4nU\\NL r:   )rV   rg   r   )r  r  r   r  s   ````r8   checkSameDevicer    s&     
LL%	
r:   UPLOc                       j                         }t        j                  t               dk(  xr |dk(  xs |dk(   fd       y )Nr3   ULc                      d  S )Nz1Expected UPLO argument to be 'L' or 'U', but got r4   )r  s   r8   re   zcheckUplo.<locals>.<lambda>  s    CD6J r:   )upperrV   rg   r   )r  UPLO_uppercases   ` r8   	checkUplor    s<    ZZ\N	LLD	QKNc1J^s5JJr:   eigenvalueseigenvectorsr  	compute_vc                 T   t        | d       t        |       t        | j                        }|r/| j	                  |      }|j                  |t        |d             n| j	                  dg      }|j                          | j	                  |t        | j                              }||fS )Nzlinalg.eighF	row_majorr   r   )
r   r  r   r   r   r   r   popr`   r^   )r  r  r  r   vecsvalss         r8   meta__linalg_eighr    s     a'dOME{{5! ;EU ST{{A3	IIK;;uOAGG$<;=D:r:   c                     t        | d       t        j                  | j                        r| j                  nt        j                  | j                        }| j                  | j                  d d |      S )Nzlinalg.eigvalsr   r   )r   rO   r   r^   r   r   r   )r   complex_dtypes     r8   meta__linalg_eigvalsr  &  sc     e-. !!%++. 	..u{{; 
 ??5;;s+=?AAr:   c                    t        | d       t        j                  | j                        r| j                  nt        j                  | j                        }| j                  | j                  d d |      }| j                  | j                  |      }t        |       dk(  }|j                  | j                  t        | j                  |             ||fS )Nz
linalg.eigr   r   r   r  )
r   rO   r   r^   r   r   r   r   r   r   )r   r  r   vectorsis_cudas        r8   meta_linalg_eigr  2  s     e\* !!%++. 	..u{{; 
 __U[["-]_CFooekko?G% F*G0P 7?r:   r*  c                 v    | j                   j                  t        j                        j	                  dd      S )Nr   r  r   )mTr   rV   r   	transpose)r*  s    r8   cloneBatchedColumnMajorr  D  s*    66<<e&=&=<>HHRPPr:   r  c                     t        |       S rD   )r  )r   r  r  s      r8   _cholesky_solve_helperr  H  s     #4((r:   c                      t        j                   j                  dk\   fd       t        j                  j                  dk\  fd       t         d      \  }}t	        |||      S )Nr   c                  $    d j                    dS )Nz-b should have at least 2 dimensions, but has  dimensions insteadr   r   s   r8   re   z cholesky_solve.<locals>.<lambda>S  s    ?		{J]^ r:   c                  $    d j                    dS )Nz-u should have at least 2 dimensions, but has r  r  r  s   r8   re   z cholesky_solve.<locals>.<lambda>W  s    ?xGZ[ r:   cholesky_solve)rV   rg   r   !_linalg_broadcast_batch_dims_namer  )r   r  r  self_broadcastedA_broadcasteds   ``   r8   r	  r	  N  sh     
LL		Q^ 
LL	![ 'Ha!'#m ""2M5IIr:   c                     | j                         dk(  r%t        j                  | t        j                        S t	        | d       t        |       S )Nr   r   cholesky)r   rV   r   legacy_contiguous_formatr   r  r   r  s     r8   r  r  _  s@     zz|qE4R4RSSdJ'"4((r:   c                 0    t        | d       t        |       S )Ncholesky_inverse)r   r  r  s     r8   r  r  h  s     d./"4((r:   check_errorsc                    t        | d       t        | d       | j                  }t        |      }t	        |d      }| j                  |      }|j                  ||       | j                  |d|dz
   t        j                        }||fS )Nzlinalg.choleskyFr   r   r   )	r   r   r   r   r   r   r   rV   rC  )r  r  r  A_shaper   	L_stridesr  infoss           r8   linalg_cholesky_exr  p  s    a*+1/0ggGw<D ,GU;I	GAMM'9% KKD1H-U[[KAEe8Or:   tauc                 @    t        j                   j                  dk\  d        t        j                   j                  d       j                  d      k\  d        t        j                   j                  d      j                  d      k\  d        t        j                   j                  j                  z
  dk(   fd        j                  dkD  r: j                  d d }j                  d d t        j                  |k(  fd	       t        j                  j
                   j
                  k(   fd
       t        d d       t        j                   j                  t         j                  d       j
                   j                        S )Nr   c                       y)NzHtorch.linalg.householder_product: input must have at least 2 dimensions.r4   r4   r:   r8   re   z,linalg_householder_product.<locals>.<lambda>  rm   r:   r  r   c                       y)Nzbtorch.linalg.householder_product: input.shape[-2] must be greater than or equal to input.shape[-1]r4   r4   r:   r8   re   z,linalg_householder_product.<locals>.<lambda>  rm   r:   c                       y)Nz`torch.linalg.householder_product: input.shape[-1] must be greater than or equal to tau.shape[-1]r4   r4   r:   r8   re   z,linalg_householder_product.<locals>.<lambda>  rm   r:   r3   c                  <    dj                    d j                    S )Nzptorch.linalg.householder_product: Expected tau to have one dimension less than input, but got tau.ndim equal to  and input.ndim is equal to r  r   r  s   r8   re   z,linalg_householder_product.<locals>.<lambda>  '    )),
2Nuzzl\ r:   c                      d  S )Nzltorch.linalg.householder_product: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r4   actual_batch_tau_shapes   r8   re   z,linalg_householder_product.<locals>.<lambda>      66L5MO r:   c                  <    dj                    d j                    S )Nz,torch.linalg.householder_product: tau dtype z does not match input dtype r   r   s   r8   re   z,linalg_householder_product.<locals>.<lambda>  s#    :399+*5;;-9 r:   z torch.linalg.householder_productr  Fr  r   r   r^   r   )
rV   rg   r   r   r   r^   r  empty_stridedr   r   )r   r  expected_batch_tau_shaper$  s   `` @r8   linalg_householder_productr*    sK   
 
LL

aZ 
LL

2%**R.(t 
LL

2#((2,&r
 
LL

SXX"	
 zzA~#(;;s#3 !$3B"&>>	
 
LL		U[[ 	
 6UEJ[[*5;;%Hkk||	 r:   c                 2   t        | d       t        | dd       | j                  | j                        }|j	                  | j                  t        | j                  d             | j                  | j                  d d t        j                        }||fS )Nzlinalg.inv_exF)r  r  r  r   r   r   r   r   r   r   rV   rC  )r  r  r  r  s       r8   linalg_inv_ex_metar-    sq    a)1o%P	AGGAMM!''6qww%PQKKEKKK8Ee8Or:   LDpivotsinfo)	hermitianr  r1  c                   t        | d       t        | d       t        j                  | j                  t        | j                  d      | j                  | j                        }| j                  | j                  d d t        j                        }| j                  | j                  d d t        j                        }|||fS )Nztorch.linalg.ldl_factor_exFr  r'  r   r   r  )
r   r   rV   r(  r   r   r^   r   r   r   )r   r1  r  r.  r/  r0  s         r8   linalg_ldl_factor_ex_metar3    s     d894!=>			ZZ*4::Gjj{{	
B ^^DJJsO599^=F>>$**Sb/>;Dvtr:   )r1  c                d    t         d       t         d       t         d       t        j                  j
                  dk\  fd        j                  d d }t        j                  |j                  k(  fd       t        j                  t        j                  j                        fd       t        j                   j                  j                  k(   fd       t               \  }}t        j                  |t        |d	      j                  j                  
      S )Nztorch.linalg.ldl_solver   c                  $    d j                    dS )NzMtorch.linalg.ldl_solve: Expected B to have at least 2 dimensions, but it has r  r  )r  s   r8   re   z'linalg_ldl_solve_meta.<locals>.<lambda>      &&!46 r:   r   c                  $    d j                    dS )Nzjtorch.linalg.ldl_solve: Expected LD.shape[:-1] and pivots.shape to be the same, but got pivots with shape  insteadr   r/  s   r8   re   z'linalg_ldl_solve_meta.<locals>.<lambda>      ))/h@ r:   c                  "    d j                    S )Nz<torch.linalg.ldl_solve: Expected pivots to be integers. Got r   r:  s   r8   re   z'linalg_ldl_solve_meta.<locals>.<lambda>  s    Nv||n] r:   c                  <    dj                    d j                    S )Nz!torch.linalg.ldl_solve: LD dtype z does not match b dtype r   )r  r.  s   r8   re   z'linalg_ldl_solve_meta.<locals>.<lambda>  s"    3BHH:=UVWV]V]U^_ r:   Fr  r'  )r   r   r  rV   rg   r   r   rO   is_integer_dtyper^   _linalg_broadcast_batch_dimsr(  r   r   )r.  r/  r  r1  expected_pivots_shapeB_broadcast_sizerR   s   ```    r8   linalg_ldl_solve_metarB    s     b232781b":;	LL	!	
 HHSbM	LL-	
 
LLv||,] 
LL
AGG_ 7q"=a*+;uMggxx	 r:   Pr  )pivotrD  c                h    t        j                   j                  dk\   fd       t         j                        }|d   }|d   }t        ||      }||d<   |r j                  |      }n j                  dg      }||d<    j                  |      }||d<   ||d<    j                  |      }|||fS )Nr   c                  $    d j                    dS )Nz@linalg.lu: Expected tensor with 2 or more dimensions. Got size: r8  r9  r  s   r8   re   z linalg_lu_meta.<locals>.<lambda>
  s    RSTSZSZR[[cd r:   r  r   r   )rV   rg   r   r   r   r   r   )	r  rD  sizesrf  r  krC  r  r  s	   `        r8   linalg_lu_metarI    s     
LL	!d
 MEb	Ab	A1AE"IKKKKE"I	EAE"IE"I	EAa7Nr:   LU)rD  r  c                    t        j                   j                  dk\   fd       t         j                        }|d   }|d   }t        j
                  |t        |d       j                   j                        }|j                          t        ||      |d<    j                  |t         j                        }|j                           j                  |t         j                        }|||fS )	Nr   c                  $    d j                    dS )NzFtorch.lu_factor: Expected tensor with 2 or more dimensions. Got size: r8  r9  r  s   r8   re   z*linalg_lu_factor_ex_meta.<locals>.<lambda>,  s    XYZY`Y`Xaaij r:   r  r   Fr  r'  r   )rV   rg   r   r   r   r(  r   r^   r   r  r   r   r   )	r  rD  r  rG  rf  r  rJ  r/  r0  s	   `        r8   linalg_lu_factor_ex_metarM  "  s     
LL	!j
 MEb	Ab	A			*5EBggxx	
B 
IIK1E"I[[eii[0F 
IIK;;uEII;.Dvtr:   )r   adjointrN  c                    t         d       t        j                   j                  j                  k(   fd       t        j                  j                  t        j                  k(  d        t         d       t         |d       t        j                   j                  d      j                  d      k(  d        t        j                   j                  d d j                  k(  fd       t               \  }}t        j                  |t        ||       j                  j                  	      }|j                         d
k7  r"|s |j                         r|j                         }|S )Nztorch.linalg.lu_solvec                  >    dj                    d j                    dS )NzPlinalg.lu_solve: Expected LU and B to have the same dtype, but found LU of type  and B of type r8  r   )r  rJ  s   r8   re   z&linalg_lu_solve_meta.<locals>.<lambda>U  s(    $$&HH:_QWWIXO r:   c                       y)NzElinalg.lu_solve: pivots should be a Tensor of scalar type torch.int32r4   r4   r:   r8   re   z&linalg_lu_solve_meta.<locals>.<lambda>\  rm   r:   zlinalg.lu_solver   c                       y)NzYlinalg.lu_solve: Number of pivots per batch should be same as the dimension of the matrixr4   r4   r:   r8   re   z&linalg_lu_solve_meta.<locals>.<lambda>d  rm   r:   c                  $    d j                    dS )Nzclinalg.lu_solve: Expected LU.shape[:-1] and pivots.shape to be the same, but got pivots with shape r8  r9  r:  s   r8   re   z&linalg_lu_solve_meta.<locals>.<lambda>j  r;  r:   r  r'  r   )r   rV   rg   r^   r   r   r  r   r   r?  r(  r   r   r   r   conj)rJ  r/  r  r   rN  rA  rR   r  s   ```     r8   linalg_lu_solve_metarV  G  s.    267	LL
AGG	
 
LL		!W b12b!T#45	LL
v{{2&k 
LL
"%	
 7q"=a  *+;4xPggxx	F ||~4[[]FMr:   unpack_dataunpack_pivotsc                     t        j                   j                  dk\   fd       |r2t        j                  |j                  t         j                  k(  d        t         j                        }|d   }|d   }t        ||      }||d<   |r j                  |      }n j                  dg      }|r2||d<    j                  |      }	||d<   ||d<    j                  |      }
n$ j                  dg      }	 j                  dg      }
||	|
fS )Nr   c                  $    d j                    dS )NzFtorch.lu_unpack: Expected tensor with 2 or more dimensions. Got size: r8  r9  )rJ  s   r8   re   z lu_unpack_meta.<locals>.<lambda>  s    XY[YaYaXbbjk r:   c                       	 y)Nztorch.lu_unpack: LU_pivots is expected to be a contiguous tensor of torch.int32 dtype.
Note: this function is intended to be used with the output produced by torch.linalg.lu_factorr4   r4   r:   r8   re   z lu_unpack_meta.<locals>.<lambda>  s    p r:   r  r   r   )	rV   rg   r   r^   rC  r   r   r   r   )rJ  r/  rW  rX  rG  rf  r  rH  rC  r  r  s   `          r8   lu_unpack_metar\    s     
LL
1k LLEKK'	
 NEb	Ab	AAq	AE"ILLLL!b	LLb	b	LLLL!LL!a7Nr:   modec                       dk(  rd}d}||fS  dk(  rd}d}||fS  dk(  rd}d}||fS t        j                  d fd       fS )NreducedTcompleteFrc                      d  dS )Nzqr received unrecognized mode 'z=' but expected one of 'reduced' (default), 'r', or 'complete'r4   )r]  s   r8   re   z _parse_qr_mode.<locals>.<lambda>  s    1$ 8N O r:   rV   rg   )r]  	compute_qr_  s   `  r8   _parse_qr_modere    s    y	 g 
		 g 
	 g 		
 gr:   QRc                    t        | d       t        | d       t        |      \  }}| j                  d   }| j                  d   }t	        ||      }|rMt        | j                        }|r|n||d<   | j                  |      }|j                  |t        |d             n| j                  dg      }t        | j                        }	|s|s|n||	d<   | j                  |	      }
|
j                  |	t        |	d             ||
fS )Nz	linalg.qrr  r   Fr  r   )	r  r   re  r   r   r   r   r   r   )r  r]  rd  reduced_moderf  r  rH  Q_shaperf  R_shaperg  s              r8   linalg_qr_metarl    s     ![!1k*,T2I|	A	AAq	Aqww-'aQKK 	g:7eTUKK 177mG#9!!GBK	GAMM'6w%PQa4Kr:   sign	logabsdetc                    t        | d       t        | dd       | j                  }| j                  |d d       }| j                  |d d t	        | j
                              }t        j                  |t        |d      | j
                  | j                        }| j                  |d d t        j                        }||||fS )Nzlinalg.slogdetFr  r   r'  r   )r   r   r   r   r`   r^   rV   r(  r   r   rC  )r  r   rm  rn  rJ  r/  s         r8   _linalg_slogdetrp    s     a)*1.6GGE;;uSbz"DE#2Joagg.FGI			*5%8ggxx	
B [[s5;;[7FB&&r:   full_matrices
compute_uvdriverc                 b   t        | d       t        | d       t        | j                  d d       }| j                  d   }| j                  d   }t	        ||      }|r|||r|n|gz   }| j                  |      }	|	j                  |t        |d             ||r|n||gz   }
| j                  |
      }t        |       dk(  }|j                  |
t        |
|             n$| j                  dg      }	| j                  dg      }| j                  ||gz   t        | j                              }|	||fS )	Nz
linalg.svdr  r   Fr  r   r   r   )r  r   r   r   r   r   r   r   r   r`   r^   )r  rq  rr  rs  r   rf  r  rH  U_shaper  V_shapeVr  Ss                 r8   _linalg_svd_metary    s#    !\"1l+aggcrl#J	A	AAq	A11==KK 	g:7eTU]1==KK 
 a.F*	g:7gVW KKKK 	
J!$OAGG,DEAa7Nr:   arg1arg2c                    | j                   d d }|j                   d d }t        ||      }t        |      }|| j                  d      | j                  d      gz  }t        |      }||j                  d      |j                  d      gz  }||fS )Nr  r   )r   r%   r   r   )rz  r{  arg1_batch_sizesarg2_batch_sizesexpand_batch_portionarg1_expand_sizearg2_expand_sizes          r8   r?  r?    s    
 zz#2zz#2,-=?OP012		"66012		"66---r:   c                     |rt        | ||       t        | |      \  }}|| j                  k(  r| n| j                  |      }||j                  k(  r|n|j                  |      }||fS rD   )r  r?  r   expand)rz  r{  r  r  r  arg1_broadcastedarg2_broadcasteds          r8   r
  r
  &  sv     tT40)EdD)Q&& !DJJ.DKK@P4Q  !DJJ.DKK@P4Q  ---r:   r   c                     | j                   d d }|j                  dk(  xs- | j                  dz
  |j                  k(  xr |j                   |k(  }|S )Nr   r3   )r   r   )r   r   expected_batched_rhs_shapevector_cases       r8   linalg_solve_is_vector_rhsr  :  sS    !&Sb!1**/ 

Q%**$R8R)R  r:   )r   r  r  rJ  r/  r0  c                    t         d       t        j                   j                  j                  k(   fd       t	               }|rj                  d      n}	t         |	|d       t        |	       \  }
}t        j                  |xs | d        |r|
d d n|
}t        j                  |t        ||       j                  j                        } j                  }t        j                  |t        |d       j                   j                        } j                  |d d t        j                        } j                  |d d t        j                        }||||f}||||f}t        d	 |D              rbt        ||      D ]S  \  }}t!        ||j                         |j#                  |j                  |j%                                t'        ||d
       U |S )Nzlinalg.solvec                  >    d j                    dj                    dS )NzKlinalg.solve: Expected A and B to have the same dtype, but found A of type rQ  r8  r   )r  r  s   r8   re   z"_linalg_solve_ex.<locals>.<lambda>Q  s%    Ywwiqwwix9 r:   r   c                       	 y)Nzlinalg.solve: Vector broadcasting of the left hand side is not supported for left=False. In this case linalg.solve is equivalent to B / A.squeeze(-1)r4   r4   r:   r8   re   z"_linalg_solve_ex.<locals>.<lambda>\  s    K r:   r'  Fr   r  c              3   $   K   | ]  }|d u 
 y wrD   r4   )rr   r=   s     r8   rt   z#_linalg_solve_ex.<locals>.<genexpr>s  s     
&Q1D=
&s   )	copy_fromcopy_toexact_dtype)r   rV   rg   r^   r  	unsqueezer  r?  r(  r   r   r   r   rC  allzipr!   r   r   r#   )r  r  r   r  r  rJ  r/  r0  r  B_B_broad_shaperR   result_shaperesult_r   LU_pivots_info_r   resra  os   ``                    r8   _linalg_solve_exr  B  s    1n-	LL	177	
 -Q2K'RQBaT>23B:M1	LLK	
 *5="%-L!!*<TBggxx	G GGE


*5%8ggxx	C kk%*EKKk8GKKcr
%++K6E2vt
$CC%
(C

&#
&&SM 	FDAqa)MM!''188:.QuE	F Jr:   )r   unitriangularr   r  r   c                   || j                  dg      }t        |t              st        dt	        |             t        | ||d       t        || d       \  }}|j                  dd      j                         xr |j                         }|rt        ||j                        }|S t        ||j                        r=|j                  |j                  dd      j                         |j                  dd       |S )Nr   zout must be TensorLike, got zlinalg.solve_triangularr  r   )r   rp   r   r   rz   r  r
  r   rd  is_conjr!   r   r"   r   
transpose_)	r  r  r  r   r  r   r  A_avoid_copy_As	            r8   linalg_solve_triangular_metar  }  s     {kk1#c:&;DI;GHHaD";<.q!T:FB<<B'557HBJJLLRXX. J  RXX.KKR,223NN2r"Jr:   XM)r  r   c                     t        j                   j                  dk\   fd       t        j                  j                  dk\  fd       t         d       j                  t         j
                  k(  rt               \  }}t        j                  |t        |d       j                   j                        }t        j                  |t        |d      j                  j                        }||fS j                  t         j                  k(  sj                  t         j                  k(  r+t        j                         } j                  dg      }||fS t        j                  dd	        fS )
Nr   c                  $    d j                    dS )NzMtorch.triangular_solve: Expected b to have at least 2 dimensions, but it has r  r  r   s   r8   re   z'triangular_solve_meta.<locals>.<lambda>  s    ))$79 r:   c                  $    d j                    dS )NzMtorch.triangular_solve: Expected A to have at least 2 dimensions, but it has r  r  r  s   r8   re   z'triangular_solve_meta.<locals>.<lambda>  r6  r:   triangular_solveFr  r'  r   c                       y)Nz+triangular_solve: Got an unexpected layout.r4   r4   r:   r8   re   z'triangular_solve_meta.<locals>.<lambda>  rm   r:   )rV   rg   r   r  r   stridedr?  r(  r   r^   r   
sparse_csr
sparse_bsrr   r   )	r   r  r  r   r  self_broadcast_sizeA_broadcast_sizesolutioncloned_coefficients	   ``       r8   triangular_solve_metar    sL    
LL		Q	
 
LL	!	
 4$67xx5== 0LTST0U--&&$./BeT**;;	
 #00!./?5Q''88	
 ''' 
U%%	%U5E5E)E##D)!^^QC0 ''' 	UQR'''r:   c                 l   t        | d       t        | d       | j                  | j                  d d       }| j                  | j                        }|j	                  | j                  t        | j                  d             | j                  | j                  d d t        j                        }|||fS )Nz
linalg.detr  Fr  r   r   r,  )r  detrJ  r/  s       r8   _linalg_det_metar    s    a&1l+
++aggcrl
#C	
QWW	BNN17775QR[["U[[[9FF?r:   c                 0    t        j                   j                  dk\  d        t        j                  j                  dk\  d        |rdndt        j                  j                     j                  d   k\  fd       t        j                  j                      j                  d   k(  fd       t        j                  j                  d    j                  d   k  d        t        j                   j                  j                  z
  d	k(   fd
       t        j                   j                  j                  k(   fd        j                  dkD  re j                  d d }j                  d d t        j                  |k(  fd       j                  d d t        j                  |k(  fd       t        j                  j                   j                  k(   fd       t        j                  j                   j                  k(   fd       t        d d       t        d d       t        j                  j                  t        j                  d      j                  j                        S )Nr   c                       y)Nz3torch.ormqr: input must have at least 2 dimensions.r4   r4   r:   r8   re   zormqr.<locals>.<lambda>  rm   r:   c                       y)Nz3torch.ormqr: other must have at least 2 dimensions.r4   r4   r:   r8   re   zormqr.<locals>.<lambda>  rm   r:   r  r   c                      d  dS )Ntorch.ormqr: other.shape[z0] must be greater than or equal to tau.shape[-1]r4   left_size_conditions   r8   re   zormqr.<locals>.<lambda>  s    +,?+@@pq r:   c                      d  dS )Nr  z"] must be equal to input.shape[-2]r4   r  s   r8   re   zormqr.<locals>.<lambda>  s    +,?+@@bc r:   c                       y)NzHtorch.ormqr: tau.shape[-1] must be less than or equal to input.shape[-1]r4   r4   r:   r8   re   zormqr.<locals>.<lambda>  rm   r:   r3   c                  <    dj                    d j                    S )Nz[torch.ormqr: Expected tau to have one dimension less than input, but got tau.ndim equal to r  r  r   s   r8   re   zormqr.<locals>.<lambda>  r!  r:   c                  <    dj                    d j                    S )Nzhtorch.ormqr: Expected other to have the same number of dimensions as input, but got other.ndim equal to r  r  r   r   s   r8   re   zormqr.<locals>.<lambda>  s+    ++0::,6RSXS]S]R^` r:   c                      d  S )NzWtorch.ormqr: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r4   r#  s   r8   re   zormqr.<locals>.<lambda>
  r%  r:   c                      d  S )NzYtorch.ormqr: Expected batch dimensions of other to be equal to input.shape[:-2], but got r4   )actual_batch_other_shapes   r8   re   zormqr.<locals>.<lambda>  s    66N5OQ r:   c                  <    d j                    dj                    S )NzPtorch.ormqr: Expected input and tau to have the same dtype, but input has dtype z and tau has dtype r   r   s   r8   re   zormqr.<locals>.<lambda>  s'    ##(;;-/B399+O r:   c                  <    d j                    dj                    S )NzRtorch.ormqr: Expected input and other to have the same dtype, but input has dtype z and other has dtype r   r  s   r8   re   zormqr.<locals>.<lambda>"  s'    ##(;;-/DU[[MS r:   ztorch.ormqrr  r   Fr  r'  )	rV   rg   r   r   r^   r  r(  r   r   )	r   r  r   r   r   expected_batch_shaper  r$  r  s	   ```   @@@r8   ormqrr    s    
LL

aV 
LL

aV !%""	LL'(CIIbM9q 
LL'(EKKO;c
 
LL		"R(Z
 
LL

SXX"	
 
LL

ejj 	
 zzA~${{3B/!$3B"&::	
 $);;s#3 $(<<	
 
LL		U[[ 	
 
LLu{{"	
 M3u5M5%9[[*5;;%Hkk||	 r:   c                r    t        j                  t              dz  k(  fd        j                  }|dz   k(  }|}| }|r*t	        d|      D ]  }|xr  j                  |      dk7  } n(t	        |      D ]  }|xr  j                  |      dk7  } t        j                  |xs | fd       y )Nr   c                  ,    dd z   dt               S )Nzpadding size is expected to be r   z, but got: r   )r   paddings   r8   re   z,_padding_check_valid_input.<locals>.<lambda>6  s    1!c'+c'l^T r:   r3   r   c                  :    d dz    d dz    dj                    S )N	Expected r3   zD or r   zcD (batch mode) tensor with possibly 0 batch size and other non-zero dimensions for input, but got: r9  )r   r   s   r8   re   z,_padding_check_valid_input.<locals>.<lambda>K  s2    aycAgY /AAFO r:   )rV   rg   r   r   r   r   )r   r  r   	input_dimis_batch_modevalid_batch_modevalid_non_batch_moder   s   ```     r8   _padding_check_valid_inputr  3  s    	LLGCT
 

I#'*M$,,q)$ 	GA/FEJJqMQ4F	G y! 	OA#7#NEJJqMQ<N 	O 
LL00	
r:   c                   	
 d}dd} j                   dk(  r j                  d      }dz  |dz  }t         |d       |\  	
 j                  |      } j                        	z   
z   |r&t        j                  	k  xr 
k   	
fd       t        j                  dk\  fd        j                   dk(  r j                  |f      S  j                  ||f      S )Nr   r3   r0   r   c                  4    d d d  dj                    S NzcArgument #4: Padding size should be less than the corresponding input dimension, but got: padding (rx   ) at dimension 
 of input r9  dim_wr   pad_lpad_rs   r8   re   z_pad1d_common.<locals>.<lambda>g  2    %%*G2eWOE7*UZU`U`Tac r:   c                      d  d S )Nz
input (W: z%) is too small. Calculated output W: r4   )input_woutput_ws   r8   re   z_pad1d_common.<locals>.<lambda>o  s    *WI%J8*U r:   r   )r   r   r  rV   rg   r   )r   r  is_reflection	dim_planenbatchnplaner  r  r  r  r  s   `     @@@@@r8   _pad1d_commonr  R  s    IEFzzQA
Q	ug15LE5ZZ	"FjjG&HGO/	
 
LLAU
 zzQ1229::r:   c                     t        | |d      S NTr  )r  r   r  s     r8   meta_reflection_pad1dr  x       t<<r:   c                      t        j                   j                  t         j                  k7   fd       t	         |d      S )Nc                  @    d j                   j                          dS )Nz)"replication_pad1d" not implemented for ''r^   __str__r   s   r8   re   z(meta_replication_pad1d.<locals>.<lambda>       =ekk>Q>Q>S=TTUX r:   Fr  )rV   rg   r^   boolr  r  s   ` r8   meta_replication_pad1dr  ~  5     
LLuzz!X u==r:   c                    d|s#t        j                  t        |      dk(  d        j                  dk(  rdz  |\  j	                        }|z   z   |r&t        j                  |k  xr |k  fd       t        j                   j	                        k(   fd       j                  j                        S )Nr3   r   c                       y)Nz padding size is expected to be 2r4   r4   r:   r8   re   z(_pad1d_backward_common.<locals>.<lambda>  rm   r:   r0   c                  4    d d d  dj                    S r  r9  r  s   r8   re   z(_pad1d_backward_common.<locals>.<lambda>  r  r:   c                  2    d dj                          S Nz(grad_output width unexpected. Expected: , Got: r   r  grad_outputr  s   r8   re   z(_pad1d_backward_common.<locals>.<lambda>  "    :8*GKL\L\]bLcKde r:   )rV   rg   r   r   r   r   r   )	r  r   r  r  r  r  r  r  r  s	   ``   @@@@r8   _pad1d_backward_commonr     s    ES\Q&(RSzzQ
LE5jjG&HGO/	
 
LLK$$U++e
 ??5;;''r:   
grad_inputc                      t        | ||d      S r  r   r  r   r  s      r8   meta_reflection_pad1d_backwardr    s     "+ugTRRr:   c                      t        | ||d      S )NFr  r  r  s      r8   meta_replication_pad1d_backwardr    s     "+ugUSSr:   c                   	
 ddd}d}t         |d        j                  }|dk(  r  j                  d      }dz  dz  |dz  }|\   j                  |      } j                        	 j                        
	z   z   
z   z   |rLt        j                  
k  xr 
k   fd       t        j                  	k  xr 	k   fd       t        j                  dk\  xs dk\  	
fd        j                  d	k(  r j                  |f      S  j                  ||f      S )
Nr   r3   r   r      c                  4    d d d  dj                    S r  r9  r  s   r8   re   z_pad2d_common.<locals>.<lambda>  r  r:   c                  4    d d d  dj                    S NzcArgument #6: Padding size should be less than the corresponding input dimension, but got: padding (rx   r  r  r9  dim_hr   pad_bpad_ts   r8   re   z_pad2d_common.<locals>.<lambda>  r  r:   c                       d  d d d S )Nz
input (H:  W: z%) is too small. Calculated output H: r4   )input_hr  output_hr  s   r8   re   z_pad2d_common.<locals>.<lambda>  s*    	gY /$$,:T(= r:   r0   r  r   r   rV   rg   r   )r   r  r  
dim_slicesr  r   r  r  r  r  r  r  r  r  r  r  r  s   `      @@@@@@@@@@r8   _pad2d_commonr    sU   EEJFug15::DqyA

a
!(E5%ZZ
#FjjGjjG&H&HGO/	
 	GO/	
 
LLA&Q	
 zzQ(;<<(CDDr:   c                     t        | |d      S r  )r  r  s     r8   meta_reflection_pad2dr    r  r:   c                      t        j                   j                  t         j                  k7   fd       t	         |d      S )Nc                  @    d j                   j                          dS )Nz)"replication_pad2d" not implemented for 'r  r  r  s   r8   re   z(meta_replication_pad2d.<locals>.<lambda>  r  r:   Fr  )rV   rg   r^   r  r  r  s   ` r8   meta_replication_pad2dr    r  r:   c                 ^    t        j                  |      }t        j                  |      }||fS rD   rV   r   )grad_wsaved_vsaved_gsaved_normsr   grad_vgrad_gs          r8   meta_weight_norm_backwardr%    s.     g&Fg&F6>r:   c                     ddd}|j                   }|j                         dk(  rdz  dz  |dz  }|\  }}}}|   }	|   }
|	|z   |z   |
|z   |z   t        j                   j	                        k(   fd       t        j                   j	                        k(   fd       |j                  |j                         S )Nr   r3   r   r	  c                  2    d dj                          S r  r   r  s   r8   re   z%meta_pad2d_backward.<locals>.<lambda>  r  r:   c                  2    d dj                          S Nz)grad_output height unexpected. Expected: r  r   r  r  r  s   r8   re   z%meta_pad2d_backward.<locals>.<lambda>!  "    ;H:W[M]M]^cMdLef r:   )r   r   rV   rg   r   r   )r  r   r  r  rd   r  r  r  r  r  r  r  r  r  r  s   `          @@@@r8   meta_pad2d_backwardr,     s     EEIJxxzQ

Q	!(E5%GG&H&H	LLK$$U++e 
LLK$$U++f >>$**%%r:   c          	      $   	
 d	ddd}t         |d        j                  dk(  }|r% j                  d      }	dz  	dz  dz  |dz  }|\   j                  |      } j                        
 j                         j                  	      
z   z   z   z   z   z   |rrt        j                  k  xr k  	 fd       t        j                  k  xr k   fd       t        j                  
k  xr 
k   fd	       t        j                  dk\  xs dk\  xs dk\  
fd
       |r j                  |f      S  j                  |f      S )Nr0   r   r3   r   r      c                  4    d d d  dj                    S r  r9  r  s   r8   re   z_pad3d_common.<locals>.<lambda>C  r  r:   c                  4    d d d  dj                    S r  r9  r  s   r8   re   z_pad3d_common.<locals>.<lambda>J  r  r:   c                  4    d d d  dj                    S )NzcArgument #8: Padding size should be less than the corresponding input dimension, but got: padding (rx   r  r  r9  )dim_dr   pad_bkpad_fs   r8   re   z_pad3d_common.<locals>.<lambda>Q  s2    %%*G2fX_UG:V[VaVaUbd r:   c                  ,    d  d d d d d S )Nz
input (D:  H: r  z%) is too small. Calculated output D: r4   )input_dr  r  output_dr  r  s   r8   re   z_pad3d_common.<locals>.<lambda>Y  s7    	gYd7) <$$,:T(4zK r:   r  )r   r  r  r  
batch_moder  r  r2  r  r  r7  r  r  r8  r  r  r  r3  r4  r  r  r  s   `      @@@@@@@@@@@@@@@r8   _pad3d_commonr:  &  s   EEEIug15qJA


Q	07-E5%vZZ	"FjjGjjGjjG'H&H&HGO/	
 	GO/	
 	GO0 0	
 
LLA7Q7(a-	
 	
 (HMNN(HEFFr:   c                     t        | |d      S r  )r:  r  s     r8   meta_reflection_pad3dr<  e  r  r:   c                      t        j                   j                  t         j                  k7   fd       t	         |d      S )Nc                  @    d j                   j                          dS )Nz)"replication_pad3d" not implemented for 'r  r  r  s   r8   re   z(meta_replication_pad3d.<locals>.<lambda>p  r  r:   Fr  )rV   rg   r^   r  r:  r  s   ` r8   meta_replication_pad3dr?  k  r  r:   c                 T    t        j                  t        |      dk(  d        |j                  dk  rt	        d|j                          j                  |j                  k7  r%t	        d j                   d|j                         ddd|j                  d	k(  rdz  dz  dz  |\  }}}}}}|j                        }	|j                        }
|j                        }|	|z   |z   |
|z   |z   ||z   |z   t        j                   j                        k(   fd
       t        j                   j                        k(   fd       t        j                   j                        k(   fd       |j                  |j                        S )N   c                       y)Nz padding size is expected to be 6r4   r4   r:   r8   re   z%meta_pad3d_backward.<locals>.<lambda>  rm   r:   r0   zinput.ndim must be > 3, got z,grad_output.ndim must equal input.ndim, got  != r   r3   r.  c                  2    d dj                          S r  r   r  s   r8   re   z%meta_pad3d_backward.<locals>.<lambda>  r  r:   c                  2    d dj                          S r)  r   r*  s   r8   re   z%meta_pad3d_backward.<locals>.<lambda>  r+  r:   c                  2    d dj                          S )Nz(grad_output depth unexpected. Expected: r  r   )r2  r  r8  s   r8   re   z%meta_pad3d_backward.<locals>.<lambda>  r  r:   )rV   rg   r   r   r   r   r   r   )r  r   r  r  r  r  r  r4  r3  r7  r  r  r2  r  r  r8  r  r  s   `           @@@@@@r8   meta_pad3d_backwardrG  u  s    
LLW"$NOzzQ;EJJ<HII5::%:;;K;K:LDQVQ[Q[P\]
 	
 EEEzzQ


07-E5%vjjGjjGjjG'H&H&H	LLK$$U++e 
LLK$$U++f 
LLK$$U++e
 ??5;;''r:   pc                 J   t        j                  | j                         d        | j                  d      }|dk  r0| j	                  dg      j                  t         j                        S | j	                  ||dz
  z  dz  f      j                  t         j                        S )Nc                       y)Nz(_pdist_forward requires contiguous inputr4   r4   r:   r8   re   z%meta__pdist_forward.<locals>.<lambda>  rm   r:   r   r3   r   r   )rV   rg   rd  r   r   r'  r  )r   rH  r  s      r8   meta__pdist_forwardrK    s     
LLP 			!AAv~~qc"%%E4R4R%SS~~qAE{a/125588 6 
 	
r:   gradpdistc                     t        j                  |j                         d        t        j                  |j                         d        t        j                  |t         j                        S )Nc                       y)Nz._pdist_backward requires self to be contiguousr4   r4   r:   r8   re   z&meta__pdist_backward.<locals>.<lambda>  rm   r:   c                       y)Nz/_pdist_backward requires pdist to be contiguousr4   r4   r:   r8   re   z&meta__pdist_backward.<locals>.<lambda>  rm   r:   r   )rV   rg   rd  r   r  )rL  r   rH  rM  s       r8   meta__pdist_backwardrQ    sW     
LLV 
LLX D0N0NOOr:   )rV  rU  c          
      0    ddl m}m} j                  d      }j                  d      }j                  d      }	 |t	        j
                   | j                  |||	f                  r j                  |||	f       t	        j                  j                         dk(  d        t	        j                  j                         dk(  d        t        j                  sGt	        j                   j                  j                  cxk(  xr j                  k(  nc  fd       j                  }
j                  |
d   |
d   t	        j                  d   k(  xr d   k(  fd	        j                   j                               S )
Nr   )guard_or_truesym_eqr3   r   r0   c                       yNzbatch1 must be a 3D tensorr4   r4   r:   r8   re   zmeta_baddbmm.<locals>.<lambda>  rm   r:   c                       yNzbatch2 must be a 3D tensorr4   r4   r:   r8   re   zmeta_baddbmm.<locals>.<lambda>  rm   r:   c                  V    dj                    d j                    dj                    S )Nz+Input dtypes must be the same, got: input: z
, batch1: z
, batch2: r   )batch1batch2r   s   r8   re   zmeta_baddbmm.<locals>.<lambda>  s0    A$**ZX^XdXdWeeopvp|p|o}~ r:   c            	      .    d d d d    d d    d	S Nz@Expected size for first two dimensions of batch2 tensor to be: [rx   z] but got: [r   r3   ].r4   batch2_sizesbscontraction_sizes   r8   re   zmeta_baddbmm.<locals>.<lambda>  s:    t2&'|LO3DB|TUFWWY[ r:   )r$  rS  rT  r   rV   sym_notr   r  rg   r   
exp_config&skip_dtype_check_in_meta_registrationsr^   r   )r   rZ  r[  rV  rU  rS  rT  dim1dim2dim3batch1_sizesr`  ra  rb  s   ```        @@@r8   meta_baddbmmrj    s7    L;;q>D;;q>D;;q>DU]]6$**tT46H#IJK{{D$-.	LL"$HI	LL"$HI<<JJ&,,6&,,6~	
 <<L<<L	aB#A	LLQ2E,q/5E"E	
 >>$))+&&r:   c                L    t        j                  | t         j                        S r   rm  r   r  s     r8   meta_bernoullirm    s     D0G0GHHr:   c                     | S rD   r4   r   rH  r  s      r8   meta_bernoulli_rp        Kr:   c                 L    t        j                  | t         j                        S r   rm  ro  s      r8   meta_bernoulli_prs    s     D0G0GHHr:   c                 ,    t        j                  |       S rD   r  rl  s     r8   meta_poissonru         D!!r:   c                     t        j                  |
| j                         k  d        t        j                  | t         j                        }t        j                  |       |fS )Nc                       y)NzJError in fused_moving_avg_obs_fake_quant_cpu: ch_axis must be < self.dim()r4   r4   r:   r8   re   z6meta__fused_moving_avg_obs_fq_helper.<locals>.<lambda>	  rm   r:   r   )rV   rg   r   r   r  )r   observer_onfake_quant_onrunning_minrunning_maxscale
zero_pointaveraging_const	quant_min	quant_maxch_axisper_row_fake_quantsymmetric_quantmasks                 r8   $meta__fused_moving_avg_obs_fq_helperr    sO      
LL$((*\ D

3DT"D))r:   c                 @   t        j                  | j                         dk(  d        t        j                  |j                         dk(  d        | j                  \  |j                  \  t        j                  k(  fd       |gt        j                  || j                  k(  xsA |t         j
                  k(  xr, | j                  t         j                  t         j                  fv d        || j                  n|}| j                  f|      S )Nr   c                       y)Nza must be 2Dr4   r4   r:   r8   re   zmeta_mm.<locals>.<lambda>	  rm   r:   c                       y)Nzb must be 2Dr4   r4   r:   r8   re   zmeta_mm.<locals>.<lambda>	  rm   r:   c            	      "    d d  d d d	S )Nz/a and b must have same reduction dim, but got [rx   z] X [r^  r4   )M1M2NrC  s   r8   re   zmeta_mm.<locals>.<lambda>	  s(    A!Brd%PRtSUVWUXXZ[ r:   c                       y)NzFout_dtype must be the same as input dtype or fp32 for fp16/bf16 inputsr4   r4   r:   r8   re   zmeta_mm.<locals>.<lambda>%	  rm   r:   r   )	rV   rg   r   r   r^   r`  ra  rb  r   )r6   r7   r=  rS   r  r  r  rC  s       @@@@r8   meta_mmr  	  s     
LLA56	LLA56GGEArGGEB	LL
b[   U]]* ?GGu~~>>\	
 (/177YL;;1v\;22r:   c                      |r(t         fdt         j                        D              S t        j                   j
                        S )Nc              3   H   K   | ]  }|vrj                   |   nd   yw)r3   Nr9  )rr   r   dimsr   s     r8   rt   z+_compute_reduction_shape.<locals>.<genexpr>-	  s$     UqatmTZZ]:Us   ")rf   r   r   rO   compute_reduction_output_shaper   )r   r  r  s   `` r8   r  r  +	  s7    UE$))DTUUU//

DAAr:   c                    t        | t        j                  j                        r| j                  j
                  S t        | d      rEt        | j                  d      r/| j                  j
                  dk7  r| j                  j
                  S y)Nr   rz   r   r   )rp   rV   _subclasses
FakeTensorfake_devicerz   hasattrr   )r0  s    r8   r   r   6	  sg    &%++667!!&&&!FMM6*MM&(}}!!!r:   input_tensorr   r  dilationis_transposedgroupsoutput_paddingc                    dt         dt         dt         dt         dt         dt         fd}dt         dt         dt         dt         dt         dt         dt         fd	}	|j                  d
d  }
| j                  d
d  |r||j                  d   z  }n<|j                  d   }|j                  d   |z  | j                  d   k7  rt        d      | j                  d   |gt        |t              r|gt              z  }n t        |      dk(  r|d   gt              z  }t        |t              r|gt              z  }n t        |      dk(  r|d   gt              z  }t        |t              r|gt              z  }n t        |      dk(  r|d   gt              z  }d }|rCt        |t              r|gt              z  }n#t        |      dk(  r|d   gt              z  }n|}t        t                    D ]]  }|r/j                   |	|   ||   ||   |
|   ||   ||                4j                   ||   ||   ||   |
|   ||                _ ddlm	} ddl
m} t        | |      r| j                  n| j                  }|j                  dk(  xr t        j                   j"                  d u }|s1t        j$                   |d
d  D cg c]  }|dkD  	 c} fd       S c c}w )NlnrH  r   rH  r  r@   c                 6    | d|z  z   ||dz
  z  z
  dz
  |z  dz   S )a  
        Formula to apply to calculate the length of some dimension of the output

        See: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
        Returns:
            The output length
        r   r3   r4   )r  rH  r   rH  r  s        r8   _formulaz+calc_conv_nd_return_shape.<locals>._formulaM	  s.     QU
Q!a%[(1,2Q66r:   rE   c                 <    | dz
  |z  d|z  z
  ||dz
  z  z   |z   dz   S )a  
        Formula to apply to calculate the length of some dimension of the output
        if transposed convolution is used.
        See: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html

        Args:
            ln: length of the dimension
            p: padding in that dim
            d: dilation in that dim
            k: kernel size in that dim
            s: stride in that dim
            op: output padding in that dim

        Returns:
            The output length
        r3   r   r4   )r  rH  r   rH  r  rE   s         r8   _formula_transposedz6calc_conv_nd_return_shape.<locals>._formula_transposed^	  s2    " Q!|a!e#a1q5k1B6::r:   r   r3   r   zInvalid channel dimensions)r  )sym_orr   c                  .    dt                ddd   dS )NzGiven input size per channel: z&. Calculated output size per channel: r   z. Output size is too small)r   )r  	ret_shapes   r8   re   z+calc_conv_nd_return_shape.<locals>.<lambda>	  s,    4T$ZL A33<QR=/ B'( r:   )r   r   r&  rp   r   r   r   r   torch._subclasses.fake_tensorr  r$  r  r  r   rz   rV   versionhiprg   )r  r9  r   r  r  r  r  r  r  r  kernel_sizeout_channelsoutput_padding_listr   r  r  r   is_cudnnr=   r  r  s                      @@r8   calc_conv_nd_return_shaper  C	  s   7S 7S 7S 7S 7S 7S 7"; ; ; ; ; ; ;QT ;& ,,qr"Kab!DQ/||A<<?V#|'9'9!'<<;<<##A&5I&'"CI%	V	)s4y('7#)c$i'	W	1:,T*(G$:D	)	X!	QK=3t9,,0ng.#1"2SY"> A%#1!#4"5D	"A"03t9 #GAJ QKN1I'* a'!*hqk;q>6RS9U%6 9< lJ/ 	      {{f$B):):d)BHIabM2qQU23(	
  3s   K&c                 b    t         j                  j                  |       t         j                  k(  S rD   rV   _prims_commonr   channels_lasttens    r8   is_channels_lastr  	  s$    44S9U=P=PPPr:   running_meanrunning_vartrainingexponential_average_factorepsilonc                 r     j                   }||j                   n|j                   }	||j                   n|j                   }
 fd} j                  |      j                   |             }|r# j                  |	      } j                  |
      }n" j                  d      } j                  d      }|||fS )Nc                      t               rt        j                  S  j                  t        j                        rt        j                  S t        j                  S r   )r  rV   r  rd  r   )r  s   r8   pick_memory_formatz2meta_miopen_batch_norm.<locals>.pick_memory_format	  sF    L)&&&%%E4K4K%L***&&&r:   r   r   )r   r   r'  )r  r9  r;  r  r  r  r  r  r   save_mean_shapesave_var_shaper  r   	save_meansave_vars   `              r8   meta_miopen_batch_normr  	  s     ""I -9,Dl((&,,O*5*A[&&v||N' 
 
 
+
.
.=O=Q
.
RC **?;	)).9 **40	))$/	8##r:   c	           
          t        | |||||||r|nd       }	d}
d}| j                  |
      dk(  rd|	|<   | j                  |	      }|S Nr3   r   )r  r   r   )r  r9  r;  r   r  r  r  r  r  	shape_outinput_channels_dimoutput_channels_dimr   s                r8   	meta_convr  	  sl     *'T	I +,1)*	%&

 
 
+CJr:   mkldnnc
           
          t        | ||||d|g       }
| j                  |
      }t        j                  }| j	                         dk(  rt        j
                  }|j                  |      }|S )NFr.  r   )r  r   rV   r  r   channels_last_3dr'  )r  r9  r;  r  r   r  r  attrscalars	algorithmr  r   out_memory_formats                r8   meta_mkldnn_convolution_defaultr  
  sp     .&&'8UFB
	 $$Y/!//" % 6 6ff#4f5
r:   c                 b    | j                  g | j                  d d |j                  d         S Nr   r   r   r   )r  r9  r;  r  r  r  s         r8   meta_linear_pointwise_defaultr  .
  s5     %%&Q(:(:3B(?&Qa&QRRr:   mklc                 b    | j                  g | j                  d d |j                  d         S r  r  )r  packed_weightorig_weightr;  r   s        r8   meta_mkl_linearr  9
  s:    ))@,$$Sb)@;+<+<Q+?@ r:   onednnc           
         t        | ||||	d|
d       }|| j                  }|t        j                  t        j                  t        j
                  t        j                  t        j                  fvrt        d|       | j                  ||      }t        |      dvrt        dt        |       d      t        j                  t        j                  t        j                  dt        |         }|j                  |      }|S )NFOoutput_dtype must be one of float32, bfloat16, uint8, int8, float8_e4m3fn, got r   )r0   r	  r.  z3Expect output to be 3d/4d/5d for conv1d/2d/3d, got r   r   )r  r^   rV   r`  rb  uint8rB  rc  r   r   r   r   r  r  r'  )r=   x_scalex_zpww_scalew_zpr;  r   r  r  r  output_scaleoutput_zero_pointoutput_dtyper  r  r  r  r   formats                       r8   meta_qconv_pointwiser  C
  s   , .	
	 77LMMNNKKJJ 
 
 !abnaop  kk)<k8y>* Ec)nEUUVW  &&""%%
 i.	
 ff6f*
r:   c                 .    |dk7  rt        d| d      |S )Nsumz#binary_op_name must be 'sum', got 'r  r   )r=   r  r  r  r  r  accumr;  r   r  r  r  r  r  r  accum_scaleaccum_zero_pointbinary_op_namerU  unary_op_nameunary_op_argsunary_op_algorithms                         r8   meta_qconv2d_pointwise_binaryr  |
  s-    4 U" 5n5EQG  r:   c                 4   t        | j                        }|j                  d   |d<   |	t        j                  t        j                  t        j
                  t        j                  t        j                  fvrt        d|	       | j                  ||	      }|S )Nr3   r   zOoutput_dtype must be one of float32, bfloat16, int8, uint8, float8_e4m3fn, got r   )
r   r   rV   r`  rb  rB  r  rc  r   r   )r=   r  r  r  r  r  r;  r  r  r  post_op_namepost_op_argspost_op_algorithmrg  r   s                  r8   meta_qlinear_pointwiser  
  s    " AGG}771:RMMNNJJKK 
 
 !abnaop  kk,lk;
r:   c                 B   |dk(  r|S t        | j                        }|j                  d   |d<   |
t        j                  t        j                  t        j
                  t        j                  t        j                  fvrt        d|
       | j                  ||
      }|S )Nr  r3   r   r  r   )
r   r   rV   r`  rb  r  rB  rc  r   r   )r=   r  r  r  r  r  x_2r;  r  r  r  x2_scalex2_zpr  rU  r  r  r  rg  r   s                       r8   meta_qlinear_pointwise_binaryr  
  s    , U"JAGG}771:RMMNNKKJJ 
 
 !abnaop  kk,lk;
r:   c                 v    t        | j                        }|j                  d   |d<   | j                  |      }|S )Nr3   r   )r   r   r   )r=   r  r;  rg  r   s        r8   meta_linear_dynamic_fp16r  
  s6     AGG}771:Rkk,'
r:   	quantizedr  r3   c                 .   t        | |||||      \  }}}| j                         dk(  r| j                  d      nd}	t        j                  }
| j                         dk(  r|||g}n|	|||g}t        j
                  || j                  | j                  |
      S Nr	  r3   r0   r~  )#max_pool2d_checks_and_compute_shaper   r   rV   r  r   r^   r   r   r  r   r  r  	ceil_modenInputPlaneoutputHeightoutputWidthr  r   r   s               r8   meta_quantized_max_pool2dr  
  s     0;9
		
 $)99;!#3B++99;!{;DK{CD{{++<<'	
 	
r:   c                     t        j                   j                         dk(   fd       t        j                  j                         dk(  fd       t        j                   j                  t         j                  t         j
                  t         j                  fv  fd       t        j                  j                  t         j                  k(  fd       t        j                  j                  t         j                  k(  fd       t        j                  j                   j                  k(  fd        j                   j                  d      j                  d       j                  	      S )
Nr   c                  ,    d j                          dS )Nzx must be a 2D tensor, got rM  r   r=   s   r8   re   z/meta_int4mm_packed_weight_cpu.<locals>.<lambda>      -H	QR+S r:   c                  ,    d j                          dS )Nzw must be a 2D tensor, got rM  r   r  s   r8   re   z/meta_int4mm_packed_weight_cpu.<locals>.<lambda>  r  r:   c                  "    d j                    S Nz#expected x to be f32/f16/bf16, got r   r  s   r8   re   z/meta_int4mm_packed_weight_cpu.<locals>.<lambda>  s    9!''C r:   c                  "    d j                    S Nzexpected w to be uint8, got r   r  s   r8   re   z/meta_int4mm_packed_weight_cpu.<locals>.<lambda>  s    .J177),T r:   c                  "    d j                    S )Nz q_group_size must be int64, got r   )q_group_sizes   r8   re   z/meta_int4mm_packed_weight_cpu.<locals>.<lambda>   s    6|7I7I6JK r:   c                  "    d j                    S )Nz5q_scale_and_zeros must have the same dtype as x, got r   )q_scale_and_zeross   r8   re   z/meta_int4mm_packed_weight_cpu.<locals>.<lambda>$  s    KL]LcLcKde r:   r   r   )rV   rg   r   r^   r`  ra  rb  r  r   r   r   r=   r  r  r  s   ````r8   meta_int4mm_packed_weight_cpur    s    QUUW\#STQUUW\#STGGu}}ennEEC	
 	GGu{{"$T	
 	%++-K	
 	##qww.e	
 {{166!9affQiqww{??r:   c                      t        j                   j                         k(  xr  j                     k(   fd       y )Nc                  j    d  d d ddj                          d dj                      z   S )NzExpected a tensor of dimension z and tensor.size[z] == rx   zbut got : dimension z] = r   r   )r   dim_sizer   r0  s   r8   re   z check_dim_size.<locals>.<lambda>-  sP    1#6GzQVW[V\\^_ .?zfll[cNdMe
fg r:   )rV   rg   r   r   )r0  r   r  r   s   ````r8   check_dim_sizer   *  s6    	LL

>X 6$ >	gr:   c                     d } |d|      \  }}	t        j                  t        |      dv d        t        j                   j                  t         j                  t         j
                  t         j                  t         j                  fv fd       t        |      dk(  r||	}}
n%t        |      dk(  r|d   |d   }}
n |d|      \  }
} |d	|      \  }}t        j                  |d u xs |dk7  d
         j                         dk(  r j                  d      nd} j                  d      } j                  d      } j                  d      }t        ||||
d|      }t        ||	||d|      }t        j                         }t         ||	|
|||dd||||||        j                         dk(  r|||g}n||||g}t        j                  | j                   j                  |      S )Nc                      t        j                  t        |      dv  fd       |d   }t        |      dk(  r|n|d   }||fS )Nr3   r   c                      d  dS )Nzavg_pool2d: 4 must either be a single int, or a tuple of two intsr4   r  s   r8   re   z1meta_avg_pool2d.<locals>.unpack.<locals>.<lambda>?      l4&(\] r:   r   r3   rV   rg   r   r  r  HWs   `   r8   unpackzmeta_avg_pool2d.<locals>.unpack<  G    H]	
 FSQACF!tr:   r  r   r3   r   c                       yNzOavg_pool2d: stride must either be omitted, a single int, or a tuple of two intsr4   r4   r:   r8   re   z!meta_avg_pool2d.<locals>.<lambda>H  rm   r:   c                  @    d j                   j                          dS )Nz""avg_pool2d" not implemented for 'r  r  r  s   r8   re   z!meta_avg_pool2d.<locals>.<lambda>L      6u{{7J7J7L6MQQ r:   r   r3   r   r  c                       yNzdivisor must be not zeror4   r4   r:   r8   re   z!meta_avg_pool2d.<locals>.<lambda>Y  rm   r:   r	  r  r  r   r0   r~  )rV   rg   r   r^   r  uint16uint32uint64r   r   pooling_output_shaperO   r   pool2d_shape_checkr   r   )r   r  r   r  r  count_include_paddivisor_overrider,  kHkWdHdWpadHpadWr  r  inputHeight
inputWidthr	  r
  r   r   s   `                     r8   meta_avg_pool2drE  2  s    M;/FB	LLFy a 
LLEKKu||U\\RRQ 6{aRB	V	F1IB&)B	7+JD$	LLD 9$4$9*
  %yy{a/UZZ^QF**R.K**R.KBJ'Rr1iPL&z2tRINK//6M



		$ yy{a\;7\;?;;kk||#	 r:   c                     t        | ||||||dd|	|
||||       | j                         }|	}t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       y )Nr3   r0   r   )r:  r   r   )r   
gradOutputr  r=  r>  r?  r@  rA  rB  r  rC  rD  r	  r
  
mem_formatr   nOutputPlanes                    r8   avg_pool2d_backward_shape_checkrJ    s    " 



		$ 99;DL:tTAX|<:tTAX|<:tTAX{;r:   c                    t        j                  t        |      dk(  xs t        |      dk(  d        |d   }t        |      dk(  r|n|d   }	t        j                  t        |      dk(  xs t        |      dk(  xs t        |      dk(  d        t        |      dk(  r|n|d   }
t        |      dk(  r|	nt        |      dk(  r|
n|d   }t        j                  t        |      dk(  xs t        |      dk(  d        |d   }t        |      dk(  r|n|d   }t        j                  |d u xs |dk7  d        |j                  }|j	                         dk(  r|d	   nd}|d
   }|d   }|d   }t        ||||
d|      }t        ||	||d|      }t        j                  |      }t        || |||	|
|||||||||       t        j                  ||j                  |j                  |      S )Nr3   r   c                       y)NzKavg_pool2d: kernel_size must either be a single int, or a tuple of two intsr4   r4   r:   r8   re   z*meta_avg_pool2d_backward.<locals>.<lambda>  rm   r:   r   c                       yr0  r4   r4   r:   r8   re   z*meta_avg_pool2d_backward.<locals>.<lambda>  rm   r:   c                       y)NzGavg_pool2d: padding must either be a single int, or a tuple of two intsr4   r4   r:   r8   re   z*meta_avg_pool2d_backward.<locals>.<lambda>  rm   r:   c                       yr4  r4   r4   r:   r8   re   z*meta_avg_pool2d_backward.<locals>.<lambda>  rm   r:   r	  r  r5  r  r   r~  )rV   rg   r   r   r   r9  rO   r   rJ  r   r^   r   )gradOutput_r   r  r   r  r  r;  r<  r=  r>  r?  r@  rA  rB  
input_sizer  r  rC  rD  r	  r
  rH  s                         r8   meta_avg_pool2d_backwardrR    s    
LLKA6[!1Q!6] 
QB;1$+a.B	LLFq@CK1,@Fq0@a 6{aVAYB6{a3v;!+;RB	LLG.S\Q.Y 1:Dw<1$4'!*D	LLD 9$4$9*
 J$yy{a/Z^QFR.KR.KBJ'Rr1iPL&z2tRINK,,U3J#



$ ;;kk|| 	 r:   c                     t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }	t        j                  | xs t        |      dv d        t        j                   j                  t         j                  t         j
                  t         j                  t         j                  fv fd       |s|n|d   }
|s|nt        |      dk(  r|
n|d   }|s|	nt        |      dk(  r|
n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                   j                  d	v d
        t        j                  | xs |dk7  d         j                  d      } j                  d      } j                  d      } j                  d      } j                  d      }t        ||||
d|      }t        ||||d|      }t        ||	||d|      }t         ||||	|
|||||ddd||||||dd        j                  dk(  r j                  ||||f      S  j                  |||||f      S )Nr3   r0   c                       yNzFavg_pool3d: kernel_size must be a single int, or a tuple of three intsr4   r4   r:   r8   re   z!meta_avg_pool3d.<locals>.<lambda>  rm   r:   r   r3   r   c                       yNzJavg_pool3d: stride must be omitted, a single int, or a tuple of three intsr4   r4   r:   r8   re   z!meta_avg_pool3d.<locals>.<lambda>  rm   r:   c                  @    d j                   j                          dS )Nz""avg_pool3d" not implemented for 'r  r  r  s   r8   re   z!meta_avg_pool3d.<locals>.<lambda>  r2  r:   c                       yNzBavg_pool3d: padding must be a single int, or a tuple of three intsr4   r4   r:   r8   re   z!meta_avg_pool3d.<locals>.<lambda>  rm   r:   r	  r.  c                       yNz9non-empty 4D or 5D (batch mode) tensor expected for inputr4   r4   r:   r8   re   z!meta_avg_pool3d.<locals>.<lambda>"  rm   r:   c                       yr4  r4   r4   r:   r8   re   z!meta_avg_pool3d.<locals>.<lambda>'  rm   r:   r  r5  r  r   zavg_pool3d()T)check_input_sizer	  )rV   rg   r   r^   r  r6  r7  r8  r   r   r9  pool3d_shape_checkr   )r   r  r   r  r  r;  r<  kTr=  r>  dTr?  r@  padTrA  rB  r  nslicesitimeiheightiwidthotimeoheightowidths   `                       r8   meta_avg_pool3drl    s    
LLKF"X 
QB;1$+a.B;1$+a.B	LL
+c&kV+\ 
LLEKKu||U\\RRQ vayBc&kQ&6F1IBc&kQ&6F1IB	LLGT 1:Dw<1$4'!*Dw<1$4'!*D	LL

fK
 
LL5 0A 5*
 ZZ]FjjnGJJrNEjjnGZZ^F D"aCE"7Bb!YGG!&"dB9EF





			-2 zzQ@AAHIIr:   c                    t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }	t        |      dk(  r|n|d   }
t        j                  | xs t        |      dv d        |s|n|d   }|s|	nt        |      dk(  r|n|d   }|s|
nt        |      dk(  r|n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  |j                  dv d	        t        j                  | xs |dk7  d
        |j	                  d      }|j	                  d      }|j	                  d      }|j	                  d      }t        ||||d|      }t        ||	||d|      }t        ||
||d|      }t        || |||	|
||||||||||||d       |j                  |j                        S )NrT  c                       yrV  r4   r4   r:   r8   re   z*meta_avg_pool3d_backward.<locals>.<lambda>a  rm   r:   r   r3   r   c                       yrX  r4   r4   r:   r8   re   z*meta_avg_pool3d_backward.<locals>.<lambda>i  rm   r:   c                       yr[  r4   r4   r:   r8   re   z*meta_avg_pool3d_backward.<locals>.<lambda>q  rm   r:   r\  c                       yr^  r4   r4   r:   r8   re   z*meta_avg_pool3d_backward.<locals>.<lambda>y  rm   r:   c                       yr4  r4   r4   r:   r8   re   z*meta_avg_pool3d_backward.<locals>.<lambda>~  rm   r:   r  r5  r  r   zavg_pool3d_backward())	rV   rg   r   r   r   r9  avg_pool3d_backward_shape_checkr   r   )r  r   r  r   r  r  r;  r<  rb  r=  r>  rc  r?  r@  rd  rA  rB  re  rf  rg  rh  otime_for_shape_checkoheight_for_shape_checkowidth_for_shape_checks                           r8   meta_avg_pool3d_backwardrw  S  s    
LLKF"X 
QB;1$+a.B;1$+a.B	LL
+c&kV+\ vayBc&kQ&6F1IBc&kQ&6F1IB	LLGT 1:Dw<1$4'!*Dw<1$4'!*D	LL

fK
 
LL5 0A 5*
 jjnGJJrNEjjnGZZ^F0D"aS27Bb!YW1&"dB9U#





', ??5;;''r:   c                 ,    t        j                   j                  dk(  xs  j                  dk(   fd        j                  d d t	        |      z   }t        j                         }t        j                  | j                   j                  |      S )Nr0   r	  c                  "    d j                    S )Nz"Expected 3D or 4D tensor, but got r9  r   s   r8   re   z*meta_adaptive_avg_pool2d.<locals>.<lambda>      4TZZLA r:   r  r~  )
rV   rg   r   r   rf   rO   r   r   r^   r   )r   output_sizerg  r   s   `   r8   meta_adaptive_avg_pool2dr|    s|    	LL		Q($))q.A ::cr?U;%77L//5M ;;jj{{#	 r:   c                      t        j                   j                  dk(  xs  j                  dk(   fd        j                   j                  d d t        |      z         S )Nr	  r.  c                  "    d j                    S )Nz"Expected 4D or 5D tensor, but got r9  r   s   r8   re   z*meta_adaptive_avg_pool3d.<locals>.<lambda>  rz  r:   r5  )rV   rg   r   r   r   rf   )r   r{  s   ` r8   meta_adaptive_avg_pool3dr    sO    	LL		Q($))q.A >>$**Sb/E+,>>??r:   c                      j                   }t        d|      D ].  t        j                   j	                        dkD   fd       0 t        j                  |dk(  xs |dk(  fd       t        j                  j
                   j
                  k(   fd       t        j                  }t              rt        j                  }j                  j                        j                  |      S )	Nr3   r   c                  *    d j                    d dS )Nz{adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero                       size for non-batch dimensions,  with dimension  being emptyr9  )grad_outr   s   r8   re   z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>  s&     66>nn5EEUVWUXXdf r:   r0   r	  c                  "    d j                    S )NzBadaptive_avg_pool2d_backward(): Expected 3D or 4D tensor, but got r9  r   s   r8   re   z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>  s    TUYU_U_T`a r:   c                  <    dj                    d j                    S Nexpected dtype z! for `grad_output` but got dtype r   )r  r   s   r8   re   z4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>  s    /$**-Nx~~N^_ r:   r   )r   r   rV   rg   r   r^   r   r  r  r   r   r'  )r  r   r   r   r   s   ``  @r8   "meta__adaptive_avg_pool2d_backwardr    s    ==D1d^ 
MM!q f	

 
LL	TQYa 
LL

hnn$_ ++M++>>$**%((}(EEr:   c                 d    t        | d       t        j                  |t        j                        S )Nadaptive_avg_pool3d_backwardr   )!_adaptive_pool_empty_output_checkrV   r   r  r  r   s     r8   "meta__adaptive_avg_pool3d_backwardr    s(     &k3QRD0N0NOOr:   r  c                       j                   }t        d|      D ]/  t        j                   j	                        dkD   fd       1 y )Nr3   r   c                  .      dj                    d dS )Nzc(): Expected grad_output to have non-zero size for non-batch dimensions, but grad_output has sizes r  r  r9  )r  r  r   s   r8   re   z3_adaptive_pool_empty_output_check.<locals>.<lambda>  s/    * --8->->,??OPQsR^` r:   )r   r   rV   rg   r   )r  r  r   r   s   `` @r8   r  r    sG    D1d^ 
Q!#	

r:   c                      j                   }t        j                  |dv  fd       t        d|      D ].  t        j                   j	                        dkD   fd       0 t        j                  t        |      dk(  d        d}d}d} j                   dk(  r j	                  d      }|dz  } j	                  |dz
        }|\  }} j                   d	k(  r;|||f} j                  |      }	 j                  |t        j                  
      }
|	|
fS ||||f}t        j                         } j                  |      j                  |      }	 j                  |t        j                  
      j                  |      }
|	|
fS )Nr0   r	  c                  "    d j                    S )Nz:adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: r9  r  s   r8   re   z*meta_adaptive_max_pool2d.<locals>.<lambda>      LU[[MZ r:   r3   r   c                  *    dj                    d  dS )Nzjadaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  r9  r   r   s   r8   re   z*meta_adaptive_max_pool2d.<locals>.<lambda>  %    '',{{m3CA3lT r:   r   c                       y)NzCadaptive_max_pool2d(): internal error: output_size.size() must be 2r4   r4   r:   r8   re   z*meta_adaptive_max_pool2d.<locals>.<lambda>  rm   r:   r	  r0   r   r   )r   rV   rg   r   r   r   r   r   rO   r   r'  )r   r{  r   dimHsizeBsizeDosizeHosizeWr   r   r   r   r   s   `           @r8   meta_adaptive_max_pool2dr    s|    ::D	LLZ 1d^ 
JJqMA	

 
LLKAU
 DEEzzQ

1	JJtax E NFFzzQFF+	ooi(//)5;;/?G|E662	33E:ooi(++-+H//)5;;/?BB' C 
 G|r:   c                 N     j                   }t        j                  |dv  fd       t         d       t        j                  j                   j                  k(   fd       t        j                        }j                  j                        j                  |      S )Nr  c                  "    d j                    S )NzKadaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: r9  r  s   r8   re   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>  s    ]^i^o^o]pq r:   adaptive_max_pool2d_backwardc                  <    dj                    d j                    S r  r   )r  r   s   r8   re   z3meta_adaptive_max_pool2d_backward.<locals>.<lambda>%  s!    /%++.OP[PaPaObc r:   r   )
r   rV   rg   r  r^   rO   r   r   r   r'  )r  r   r   r   r   s   ``   r8   !meta_adaptive_max_pool2d_backwardr    s     D	LLq
 &k3QR	LL{(((c
 //6M??5;;'***GGr:   c                      j                   }t        j                  |dv  fd       t        d|      D ].  t        j                   j	                        dkD   fd       0 t        j                  t        |      dk(  d        d}d}d}|dk(  r j	                  d      }|dz  } j	                  |      }|\  }}}|d	k(  r||||f}	n|||||f}	 j                  |	      }
 j                  |	t        j                  
      }|
|fS )Nr\  c                  "    d j                    S )Nz:adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: r9  r  s   r8   re   z*meta_adaptive_max_pool3d.<locals>.<lambda>2  r  r:   r3   r   c                  *    dj                    d  dS )Nzjadaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  r9  r  s   r8   re   z*meta_adaptive_max_pool3d.<locals>.<lambda>7  r  r:   r0   c                       y)NzCadaptive_max_pool3d(): internal error: output_size.size() must be 3r4   r4   r:   r8   re   z*meta_adaptive_max_pool3d.<locals>.<lambda>?  rm   r:   r.  r	  r   )r   rV   rg   r   r   r   r   r   )r   r{  r   dimDr  r  osizeTr  r  r   r   r   r   s   `           @r8   meta_adaptive_max_pool3dr  ,  s    ::D	LLZ 1d^ 
JJqMA	

 
LLKAU
 DEEqy

1	JJtE(FFFqyFFF3	E666:	
//)
$Cooiu{{o;G<r:   c                 P    t        | d       |j                  |j                        S )Nadaptive_max_pool3d_backward)r  r   r   )r  r   r   s      r8   !meta_adaptive_max_pool3d_backwardr  X  s"     &k3QR??5;;''r:   c                 >    |t        d      | j                  |      S )Nz:cannot repeat_interleave a meta tensor without output_size)r&  r   )repeatsr{  s     r8   meta_repeat_interleave_Tensorr  _  s%    WXX[))r:   c                 ~   | j                   j                  st        d| j                          |j                   j                  st        d|j                          t        | j	                  t        | j                               |j	                  t        |j                               t        j                        }|S )Nz!real must be floating point, got z!imag must be floating point, got rL   )r^   r   r   rT   r'  r   r   rP   )realimagr  s      r8   meta_complexr  f  s     ::''@MNN::''@MNN+DJJ78+DJJ786>>F
 Mr:   )
fill_valuer  c                   t        |       dv r1| j                  || j                         ft        j                        S t        j
                  || j                         fd|ft        j                  | j                        S )N)cpur   r   r3   r^   r   )r   r   r   rV   r   r(  r   )r   r   r  s      r8   nonzero_staticr  u  si     4N*~~tTXXZ0

~CC""488:I**;;	
 	
r:   c                 
   t        j                  t        j                  d        t        j                  | j                         | j                         fd| j                         ft         j                  | j                        S )Nc                       y)NaY  The register_meta function for torch.nonzero() raises unimplemented by default, as a correct data-independent implementation does not exist. This implementation returns a fake value, assuming all elements of the tensor are non-zero. To enable this registration, please set 'torch.fx.experimental._config.meta_nonzero_assume_all_nonzero' to True.r4   r4   r:   r8   re   znonzero.<locals>.<lambda>  rm   r:   r3   r  )	rV   _check_not_implementedrd  meta_nonzero_assume_all_nonzeror(  r   r   r   r   r   s    r8   nonzeror    sf     
  22	S 	txxz"	
DJJLjj{{	 r:   c           
          t        j                  t              d        g }t              D ]  \  ft        j                  j                  t         j
                  t         j                  t         j                  t         j                  fv d        j                  t         j                  t         j                  fv rȉj                         }t        |      t        j                  j                  z    j                  k   fd       t        j                        D ]`  t        j                  j                      j                  z      k(   fd       |j                  |j                  d             b ]|j                         p|j                          |t        j                  t               j                  k   fd       dd lm} t%         |j&                         t               j                  k  r*j                  d        t               j                  k  r*d}d}D ]  |dk(  rd}|dk(  rd	} n d
}|sg }g }t              D ]*  \  	|j                         |j                         , t              D ]*  \  	|j                         |j                         ,  j)                  |       |g g g t              D ]\  \  }	@rj                   j                  |	          )j                   j                  |	          Ht%        j                        ^ fd}
 j+                  z   z         }ddlm}  | j1                         dk(        r|S  |
       }t3        j4                  |      \  }}t%        |      t%        t        t        |                  k7  r~t3        j6                  |j                  |      }t3        j8                  |      }t3        j6                  |t3        j:                  |            }|j=                  |j?                         |      }|S )Nc                       y)Nz#at least one index must be providedr4   r4   r:   r8   re   z#meta_index_Tensor.<locals>.<lambda>  rm   r:   c                       y)Nz?tensors used as indices must be long, int, byte or bool tensorsr4   r4   r:   r8   re   z#meta_index_Tensor.<locals>.<lambda>  rm   r:   c                  "    d j                    S )N)too many indices for tensor of dimension r  r   s   r8   re   z#meta_index_Tensor.<locals>.<lambda>  s    G		{S r:   c            	      N    dj                    d  dj                    dz    S )NzThe shape of the mask 
 at index z0 does not match the shape of the indexed tensor r9  )r   r   jrH  r   s   r8   re   z#meta_index_Tensor.<locals>.<lambda>  s<    "8ZPQs SJJN**U_`ade`e_f!h r:   r3   c                  <    dj                    dt                dS )Nr  z (got ry   )r   r   )r   r   s   r8   re   z#meta_index_Tensor.<locals>.<lambda>  s!    ;DII;fSQX\NZ[\ r:   r   Fr   Tc                     z   z   }t        | j                               }dgt              z  |t              t        | j                        t              z
   | j	                  ||      S )zI
        This follows restride_src in TensorAdvancedIndexing.cpp
        r   )r   r   r   r   rD  )r   r   r   after_shapebefore_shapereplacement_shapes      r8   _restride_srcz(meta_index_Tensor.<locals>._restride_src  so     00;>t{{}%KL#PSQ
 K
L!C

Oc+6F$FG ug..r:   guard_or_false) rV   rg   r  	enumerater^   r   r   rB  r  r   r   r   r   r   r   selecttorch._refs_refsr   r&   r   r   r$  r  r   rO   3compute_elementwise_output_logical_to_physical_perm
apply_permr   invert_permrD  r   )r   r   r  r  refsstatehas_contiguous_subspacer  transposed_indicesr   r  r   r  restrided_selfpermrR   
perm_shaper3  r  r  r   r   r  rH  r  s   ``                @@@@@@@r8   meta_index_Tensorr    s   	LLg MN #%Fg& !5LL

EIIuzz5::NNY {{uzz5::66--/K""

Ndii/S uzz* 8A&&A$**QU*;;h
 MM'..A"678 e$MM% /!0 G	LLG		!\
 (4(('23G
g,
"t g,
" E# 'A: aZ} ' #'
 #!'* 	1HAu A"))%0	1 "'* 	1HAu}A"))%0	1 ||D!$ !LK#%( 2
U= ""4::c?3##DJJsO4 $U[[ 12
/ ..(99KG
HCDdjjla'(

 #4(NGGWGD! DzT%D	*++%%cii6
66zB
%%j%2C2CD2IJ
nnSXXZ4Jr:   c                 >   d }d }d }d }|
d   r8 || |      }| j                  |j                               j                  |      }|
d   r8 |||       }| j                  |j                               j                  |      }|
d   r| j                  |      }|||fS )Nc                 &   t        |       }t        |      }|t        j                  k(  s|t        j                  k(  rt        j                  S |t        j                  k(  s|t        j                  k(  rt        j                  S t        j                  S rD   )r   rV   r  r  r   )t1t2fmt1fmt2s       r8   _conv_memory_formatz6meta_convolution_backward.<locals>._conv_memory_format9  sr     %R($R(5&&&$%2E2E*E&&&5)))TU5K5K-K)))&&&r:   r   r   r3   r   )r   r   r'  )grad_output_input_weight_bias_sizes_optr   r  r  
transposedr  r  output_maskbackend_grad_inputbackend_grad_weightbackend_grad_biasr  r   s                   r8   meta_convolution_backwardr    s      	' 1~+L'B)33FKKMBEE' F 
 1~+FLA*44W\\^DGG' H 
 1~(22>B 35FGGr:   c                   j                  d      }j                  d      }| j                  ||f      } t        j                  j	                         dk(  d        t        j                  j	                         dk(  d        t        j                  j                  d      j                  d      k(  fd       t        j                  j                  d      j                  d      k(  fd       t        j                  | j                  d      |k(  xr | j                  d      |k(  d	        | j                  | j                               S )
Nr3   r   r0   c                       yrV  r4   r4   r:   r8   re   zmeta_addbmm.<locals>.<lambda>Z  rm   r:   c                       yrX  r4   r4   r:   r8   re   zmeta_addbmm.<locals>.<lambda>[  rm   r:   r   c                  P    d j                  d       dj                  d       S )Nz8batch1 and batch2 must have same number of batches, got r   r   r   rZ  r[  s   r8   re   zmeta_addbmm.<locals>.<lambda>^  s.    J6;;WX>JZZ_`f`k`klm`n_op r:   c            
          d j                  d       d j                  d       dj                  d       dj                  d       d	S )Nz#Incompatible matrix sizes for bmm (r3   r=   r   r   ry   r   r  s   r8   re   zmeta_addbmm.<locals>.<lambda>b  sQ    1&++a.1A6;;q>BR S;;q>"!FKKN#316 r:   c                       y)Nz.self tensor does not match matmul output shaper4   r4   r:   r8   re   zmeta_addbmm.<locals>.<lambda>i  rm   r:   )r   r  rV   rg   r   r   )r   rZ  r[  rV  rU  rf  rg  s    ``    r8   meta_addbmmr  T  s    ;;q>D;;q>D;;d|$D	LL"$HI	LL"$HI	LLA&++a.(p 
LLA&++a.(	
 
LL		!51!5@ >>$))+&&r:   c                 @    | j                  | j                               S rD   r   r   )r   r  kwargss      r8   meta_randint_liker  n  s    >>$))+&&r:   )
grad_scale	found_infc       	         n    | |||||fD ])  t        j                  t        t              fd       + y )Nc                       dt                S Nz'exponent must be a tensor list but got rz   ls   r8   re   z#meta__fused_adam_.<locals>.<lambda>      =d1gYG r:   rV   rg   rp   r   )r   gradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepslrbeta1beta2weight_decayepsamsgradmaximizer  r  r  s                  @r8   meta__fused_adam_r  s  s:    & E8[/;O 
q$G	

r:   c       	             | |||||fD ])  t        j                  t        t              fd       + d } ||        ||       ||       ||       ||      fS )Nc                       dt                S r  r  r  s   r8   re   z"meta__fused_adam.<locals>.<lambda>  r  r:   c                 R    | D cg c]  }t        j                  |       c}S c c}w rD   r  )tensor_listr  s     r8   empty_like_listz)meta__fused_adam.<locals>.empty_like_list  s!    -89  #999s   $r  )r   r  r  r	  r
  r  r  r  r  r  r  r  r  r  r  r  r  s                   @r8   meta__fused_adamr    ss    & E8[/;O 
q$G	

: 	!$( r:   c                 j    t        j                   j                         dk(  d        t        j                  j                         dk(  d        t        j                   j                  t         j                  u  fd       t        j                  j                  t         j                  u fd       t        j                   j                  d      j                  d      k(   fd        j                   j                  d      j                  d      ft         j                  	      S )
Nr   c                       y)Nza must be a 2D tensorr4   r4   r:   r8   re   zmeta__int_mm.<locals>.<lambda>  rm   r:   c                       y)Nzb must be a 2D tensorr4   r4   r:   r8   re   zmeta__int_mm.<locals>.<lambda>  rm   r:   c                  "    d j                    S )Nzexpected self to be int8, got r   )r6   s   r8   re   zmeta__int_mm.<locals>.<lambda>      0	: r:   c                  "    d j                    S )Nzexpected mat2 to be int8, got r   )r7   s   r8   re   zmeta__int_mm.<locals>.<lambda>  r  r:   r3   r   c            
          d j                  d       d j                  d       dj                  d       dj                  d       d	S )Nz'Incompatible matrix sizes for _int_mm (r   r=   r3   r   ry   r   r5   s   r8   re   zmeta__int_mm.<locals>.<lambda>  sM    5affQi[!&&) M66!9+Qqvvayk, r:   r   )rV   rg   r   r^   rB  r   r   rC  r5   s   ``r8   meta__int_mmr!    s     
LLA>?	LLA>?	LL	5::: 
LL	5::: 
LL	q	QVVAY	
 ;;q	166!9-U[[;AAr:   c                 f    t        j                   j                         dk(  d        t        j                   j                  t         j                  u  fd        j                  d      } j                  d      dz  } j                  |dz  ||dz  z  d|dz  ft         j                  	      S )
Nr   c                       yNzw must be a 2D tensorr4   r4   r:   r8   re   z2meta__convert_weight_to_int4pack.<locals>.<lambda>  rm   r:   c                  "    d j                    S r  r   r  s   r8   re   z2meta__convert_weight_to_int4pack.<locals>.<lambda>      .qwwi8 r:   r   r3             r   )rV   rg   r   r^   r  r   r   rC  r  inner_k_tilesr  rH  s   `   r8    meta__convert_weight_to_int4packr,    s    	LLA>?	LL	5;;8 	
q	A	q	AA;;F-"$%Q		
 kk   r:   c                 J    t        j                   j                         dk(  d        t        j                   j                  t         j                  u  fd        j                  d      } j                  d      } j                  ||dz  ft         j                        S )Nr   c                       yr$  r4   r4   r:   r8   re   z:meta__convert_weight_to_int4pack_for_cpu.<locals>.<lambda>  rm   r:   c                  "    d j                    S Nzexpected w to be int32, got r   r  s   r8   re   z:meta__convert_weight_to_int4pack_for_cpu.<locals>.<lambda>  r&  r:   r   r3   r   )rV   rg   r   r^   rC  r   r   r  r*  s   `   r8   (meta__convert_weight_to_int4pack_for_cpur1    s    	LLA>?	LL	5;;8 	
q	A	q	A;;	
AFkk   r:   c                     t        j                   j                         dk(  d        j                  j                  dk(  rdndt        j                  j                         k(  fd       t        j                   j
                  t         j                  t         j                  t         j                  fv  fd       t        j                  j
                  t         j                  u fd       j                  j                  dk(  rj                  d      nj                  d      d	z  } j                   j                  d      | j
                  
      S )Nr   c                       yNzx must be a 2D tensorr4   r4   r:   r8   re   z*meta__weight_int4pack_mm.<locals>.<lambda>  rm   r:   r   r	  c                      d  dS )Nzw must be a zD tensorr4   )expected_dims   r8   re   z*meta__weight_int4pack_mm.<locals>.<lambda>  s    Lh2W r:   c                  "    d j                    S r  r   r  s   r8   re   z*meta__weight_int4pack_mm.<locals>.<lambda>      5aggY? r:   c                  "    d j                    S r0  r   r  s   r8   re   z*meta__weight_int4pack_mm.<locals>.<lambda>  r&  r:   r   r'  r   )rV   rg   r   r  rz   r^   r`  ra  rb  rC  r   r   )r=   r  r  r  dim_nr6  s   ``   @r8   meta__weight_int4pack_mmr;    s    	LLA>?**e31L	LLL(*WX	LL	EMM5==%..AA? 
LL	5;;8 ++u4AFF1I!&&)a-E;;qvvay%qww;77r:   c                 (    t        j                   j                         dk(  d        t        j                  j                         dk(  d        t        j                   j                  t         j                  t         j
                  t         j                  fv  fd       t        j                  j                  t         j                  u fd        j                   j                  d      j                  d       j                        S )Nr   c                       yr4  r4   r4   r:   r8   re   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>  rm   r:   c                       yr$  r4   r4   r:   r8   re   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>   rm   r:   c                  "    d j                    S r  r   r  s   r8   re   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>  r8  r:   c                  "    d j                    S r  r   r  s   r8   re   z2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>  r&  r:   r   r   )
rV   rg   r   r^   r`  ra  rb  r  r   r   r  s   ``  r8    meta__weight_int4pack_mm_for_cpurA        	LLA>?	LLA>?	LL	EMM5==%..AA? 
LL	5;;8 ;;qvvay!&&)177;;;r:   c                 (    t        j                   j                         dk(  d        t        j                  j                         dk(  d        t        j                   j                  t         j                  t         j
                  t         j                  fv  fd       t        j                  j                  t         j                  u fd        j                   j                  d      j                  d       j                        S )Nr   c                       yr4  r4   r4   r:   r8   re   z;_weight_int4pack_mm_with_scales_and_zeros.<locals>.<lambda>  rm   r:   c                       yr$  r4   r4   r:   r8   re   z;_weight_int4pack_mm_with_scales_and_zeros.<locals>.<lambda>  rm   r:   c                  "    d j                    S r  r   r  s   r8   re   z;_weight_int4pack_mm_with_scales_and_zeros.<locals>.<lambda>  r8  r:   c                  "    d j                    S r0  r   r  s   r8   re   z;_weight_int4pack_mm_with_scales_and_zeros.<locals>.<lambda>  r&  r:   r   r   )
rV   rg   r   r^   r`  ra  rb  rC  r   r   )r=   r  r  qScaleqZeross   ``   r8   )_weight_int4pack_mm_with_scales_and_zerosrJ    rB  r:   r6   r7   c                     | |z   dz
  |z  |z  S r2   r4   r5   s     r8   kai_rounduprL    s    UQY1!!r:   c                   	
 | dk(  ry||k(  r(d}d}d}dddd fdfd} ||||||      S |dz  d	k(  rC||z  d	k(  r:d}d}d}dddd		fd
}	
fdd 
	fd	fd |||||||      S y y y )Nr	  r'  r(  r   c                 8    t        ||z  d      }t        | |      S )Nr	  rL  )rH  krsrkr_sr_roundedup4s       r8   kai_k_roundedupz3get_kai_packed_weight_size.<locals>.kai_k_roundedup+  s#     $/rBw#: "1&677r:   c                 f     | ||      }|dz  dk7  rt        d|       ||dz  z   z   z   z  S )Nr   r   zk_internal must be even, got r  )	rH  nrrP  rQ  
k_internalrS  kai_num_bytes_biaskai_num_bytes_multiplier_rhskai_num_bytes_sum_rhss	        r8   9kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0z]get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4cxp_qsu4cxs1s01  s`     -QB7
Nq((+H)UVV1_23+, )) r:   c                 >    t        | |      |z  }| ||||      z  S rD   rO  )r  rH  rU  rP  rQ  num_rowsrZ  s         r8   7kai_get_rhs_packed_size_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0z[get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_size_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0@  s6     'q"-3 O2r2r:   r)  r   c                     ||z  dk7  rt        d| d| d      |	z  dk7  rt        d| d	 d      |z  dk7  rt        d| d d      t        | |      |z  }| |||||      z  S Nr   bl (z) must be divisible by kr (ry   znr (z+) must be divisible by kai_nr_multiple_of (+) must be divisible by kai_bl_multiple_of (r   rL  )
r  rH  rU  rP  rQ  blr\  kai_bl_multiple_of;kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0kai_nr_multiple_ofs
          r8   9kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0z]get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0X  s     G>(4t3NrdRS)TUU++1(rd"MN`Maabc  ++1(rd"MN`Maabc  'q"-3 Q2r2rr:   c                     ||z  dk7  rt        d| d| d      |
z  dk7  rt        d| d
 d      |z  dk7  rt        d| d d       	       } | |      } ||      }|||z  z   z   z  S r_  r  )rH  rU  rP  rQ  rc  num_bytes_multiplier_rhsnum_blocks_per_rownum_bytes_per_blockrd  #kai_get_bf16_datatype_size_in_bytesrf  kai_num_blocks_per_rowrW  kai_num_bytes_per_blockrY  s           r8   re  z_get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0o  s     G>(4t3NrdRS)TUU++1(rd"MN`Maabc  ++1(rd"MN`Maabc 
 ,O+P(%;Ar%B"&=0'# (+==+,() r:   c                       y)Nr   r4   r4   r:   r8   rl  zGget_kai_packed_weight_size.<locals>.kai_get_bf16_datatype_size_in_bytes  s    r:   c                 V    |z  dk7  rt        d| d d      t        | |      |z  S )Nr   r`  ra  ry   rb  )rH  rc  rd  s     r8   rm  z:get_kai_packed_weight_size.<locals>.kai_num_blocks_per_row  sH    ++1(rd"MN`Maabc  #1b)R//r:   c                 H    | z  dk7  rt        d|  d d      | dz  |z   S )Nr   r`  ra  ry   r   r  )rc  ri  rd  s     r8   rn  z;get_kai_packed_weight_size.<locals>.kai_num_bytes_per_block  sF    ++1(rd"MN`Maabc  a#;;;r:   r4   )n_bitsr  K	groupsizekai_nrkai_krkai_srr]  rg  rd  rl  re  rZ  rS  rf  rm  rW  rX  rn  rY  s            @@@@@@@@@@@r8   get_kai_packed_weight_sizerx    s    {>FFF$%!+,(!"8
 K1fff  ^q Q]a%7FFF$%!!"!"!#. :0< M1fffi ] &8 _ r:   c                     t        j                   j                  t         j                  u  fd       t         j                  j
                  j                         r||k(  r|j                  t         j                  k(  s2||k  re|dz  dk(  r]||z  dk(  rU|j                  t         j                  k(  r8t        d|||      } j                  t        |      t         j                        S  j                         |j                         z   }|||j                         z  } j                  |t         j                        S )Nc                  "    d j                    S r  r   )weightss   r8   re   z2meta__dyn_quant_pack_4bit_weight.<locals>.<lambda>  s    .w}}o> r:   r)  r   r	  r   )rV   rg   r^   r  backendskleidiaiis_availablerZ   rb  rx  r   r   r   )r{  scales_zerosr;  
block_sizein_featuresout_featurespacked_weight_sizes   `      r8    meta__dyn_quant_pack_4bit_weightr    s    
LL$> ~~++-	{	"|'9'9U[['H$R1$j(A-""enn4 8|[*
   %7!8 LL <+=+=+??djjl*/u{{CCr:   c                 n    t        j                   j                         dk(  d        t        j                   j                  t         j                  k(  xs$  j                  t         j
                  k(  xr k(   fd        j                  d      } j                  || j                        S )Nr   c                       y)Nzinput must be a 2D tensorr4   r4   r:   r8   re   z-meta__dyn_quant_matmul_4bit.<locals>.<lambda>  rm   r:   c                  .    dj                    d  d S )NzPexpected input to be f32 or bf16 (bf16 requires block_size == in_features), got z with block_size=z and in_features=r   )r  r  inps   r8   re   z-meta__dyn_quant_matmul_4bit.<locals>.<lambda>  s*    99+.zl:KK=Z r:   r   r   )rV   rg   r   r^   r`  rb  r   r   )r  packed_weightsr  r  r  r  s   ` ``  r8   meta__dyn_quant_matmul_4bitr    s     
LLa!DE	LL	emm	# 	GII'EJ+,E	
 	A==L		=::r:   c                 (    t        j                   j                         dk(  d        t        j                   j                  t         j                  t         j
                  t         j                  fv  fd       t        j                  j                         dk(  d        t        j                  j                  t         j                  u fd        j                   j                  d      j                  d       j                        S )Nr   c                       yr4  r4   r4   r:   r8   re   z*meta__weight_int8pack_mm.<locals>.<lambda>  rm   r:   c                  "    d j                    S r  r   r  s   r8   re   z*meta__weight_int8pack_mm.<locals>.<lambda>  r8  r:   c                       yr$  r4   r4   r:   r8   re   z*meta__weight_int8pack_mm.<locals>.<lambda>  rm   r:   c                  "    d j                    S )Nzexpected w to be int8, got r   r  s   r8   re   z*meta__weight_int8pack_mm.<locals>.<lambda>  s    -aggY7 r:   r   r   )
rV   rg   r   r^   r`  ra  rb  rB  r   r   )r=   r  q_scaless   `` r8   meta__weight_int8pack_mmr    s    	LLA>?	LL	EMM5==%..AA? 
LLA>?	LL	5::7 ;;qvvay!&&)177;;;r:   c                 n    t        j                   j                         dk\   fd       t        j                  j                         dk\  fd       t        j                   j                  d      j                  d      k(   fd       t        j                  t	        j
                   j                         fd       t        j                  t	        j
                  j                        fd       t        j                  |dk\  d	        t        j                  d
v fd        j                  d      }j                  d      } j                  d d }j                  d d }t        t        j                  ||            }|j                  ||g        j                  |      S )Nr   c                  ,    d j                          dS )Nz1cdist only supports at least 2D tensors, X1 got: rM  r   x1s   r8   re   z$meta_cdist_forward.<locals>.<lambda>      CBFFH:QO r:   c                  ,    d j                          dS )Nz1cdist only supports at least 2D tensors, X2 got: rM  r   x2s   r8   re   z$meta_cdist_forward.<locals>.<lambda>  r  r:   r   c                  P    d j                  d       dj                  d       S )Nz4X1 and X2 must have the same number of columns. X1: r   z X2: r   )r  r  s   r8   re   z$meta_cdist_forward.<locals>.<lambda>  s,    Frwwr{mSXY[Y`Y`acYdXef r:   c                  "    d j                    S )Nz3cdist only supports floating-point dtypes, X1 got: r   r  s   r8   re   z$meta_cdist_forward.<locals>.<lambda>      EbhhZP r:   c                  "    d j                    S )Nz3cdist only supports floating-point dtypes, X2 got: r   r  s   r8   re   z$meta_cdist_forward.<locals>.<lambda>  r  r:   r   c                       y)Nz)cdist only supports non-negative p valuesr4   r4   r:   r8   re   z$meta_cdist_forward.<locals>.<lambda>  rm   r:   )Nr   r3   r   c                      d  S )Nz(possible modes: None, 0, 1, 2, but was: r4   )compute_modes   r8   re   z$meta_cdist_forward.<locals>.<lambda>  s    :<.I r:   r  )rV   rg   r   r   rO   is_float_dtyper^   r   r   broadcast_shapesextendr   )	r  r  rH  r  r1r2batch_tensor1batch_tensor2rg  s	   `` `     r8   meta_cdist_forwardr    sJ   	LL
AO 
LL
AO 
LL
rwwr{"f 
LLRXX&P 
LLRXX&P 
LLaLM	LL'I 
B	BHHSbMMHHSbMM..}mLMLR!<<%%r:   c                 4   |j                   d   }|j                   d   }|j                   d   }|j                   d d }|j                   d d }	t        t        j                  ||	            }
|
j	                         }|j                  ||g       t        j                  |
      }|dk(  s|dk(  s
|dk(  s|dk(  rt        j                  |      S |t        |j                         k7  r|j                  |      }t        j                  |t        j                        S )Nr   r  r   r   )r   r   rV   r  copyr  mathprod
zeros_liker  r   r   )rL  r  r  rH  cdistc1r  r  r  r  r  tensor1_expand_sizebatch_products                r8   meta_cdist_backwardr    s     
"B	"B	"BHHSbMMHHSbMM 6 6}m TU.335Bx(II23M	Qw"'R1W(:##d288n,YY*+Be.E.EFFr:   c	                     t        j                  j                  t         j                  t         j                  fv fd       t        j                  j                  t         j                  t         j                  fv fd       t        j                  t        j                   j                         fd       j                  d      }	|rt        j                  |	dk\  d        |	dz  }	 j                  |	 j                  d            }
}t        j                  |t        k(  d        t        j                  j                  dk(  fd       t        j                  j                         j                         k(  fd	       fd
d fd}t              dk7  r|j                  j                  d            }j                  j                               }|t        k(  r"j                  |	 j                  d            }nj                  d      }n | |
|      }|t        t        fv s|s!j                  j                  d            }nj                  d      }j                  |	      }j                  d   }|t        k(  rA|rt        j                  |dk\  d        |dz  }j                  | j                  d         }nj                  |j                               }|
|||fS )Nc                  "    d j                    S )Nz(expected indices to be long or int, got r   )r   s   r8   re   z$meta_embedding_bag.<locals>.<lambda>*      :7==/J r:   c                  "    d j                    S )Nz(expected offsets to be long or int, got r   )rw  s   r8   re   z$meta_embedding_bag.<locals>.<lambda>.  r  r:   c                  "    d j                    S )Nz/expected weight to be floating point type, got r   )r9  s   r8   re   z$meta_embedding_bag.<locals>.<lambda>2  s    A&,,P r:   r   r3   c                       yNz1include_last_offset: numBags should be at least 1r4   r4   r:   r8   re   z$meta_embedding_bag.<locals>.<lambda>9  rm   r:   c                       y)Nz@embedding_bag: per_sample_weights only supported with mode='sum'r4   r4   r:   r8   re   z$meta_embedding_bag.<locals>.<lambda>B  rm   r:   c                  $    d j                    dS )Nz1expected per_sample_weights to be 1D tensor, got rM  r  )per_sample_weightss   r8   re   z$meta_embedding_bag.<locals>.<lambda>F  s    GHZH_H_G``ab r:   c                  N    dj                          d j                          dS )Nz%expected per_sample_weights.numel() (z$ to be the same as indices.numel() (ry   r   )r   r  s   r8   re   z$meta_embedding_bag.<locals>.<lambda>J  s4    78J8P8P8R7S T66=mmo5FaI r:   c                 D     | ||      xr |j                  d      dk(  S Nr   r3   r   )r*  r}  r   padding_idxis_fast_path_index_selects       r8   is_fast_path_index_select_scalez;meta_embedding_bag.<locals>.is_fast_path_index_select_scaleP  s(    %c6;?XELLQROWXDX	
r:   c                     | j                   t        j                  k(  xs | j                   t        j                  k(  xr1 | j	                  d      dk(  xr |j	                  d      dk(  xr |dk  S r  )r^   rV   rZ   rX   r   )r*  r   r  s      r8   r  z5meta_embedding_bag.<locals>.is_fast_path_index_selectU  sb    YY%++%@ejj)@  

1" a A%  a		
r:   c                 2    | | |||      S  | ||      S rD   r4   )r*  r}  r   r  r  r  s       r8   is_fast_pathz(meta_embedding_bag.<locals>.is_fast_path]  s)    23v{SS,S&+FFr:   r  c                       yr  r4   r4   r:   r8   re   z$meta_embedding_bag.<locals>.<lambda>w  rm   r:   )rV   rg   r^   r   r   rO   r  r   r   MODE_SUMr   r   r   MODE_MAX	MODE_MEANr   )r9  r   rw  scale_grad_by_freqr]  sparser  include_last_offsetr  num_bagsr   r  
offset2bagbag_sizemax_indicesfast_path_sumnumBagsr  r  s   ```   `          @@r8   meta_embedding_bagr    s}    
LL%**eii00J 
LL%**eii00J 
LLV\\*P
 ||AHMG	
 	AhA7F%HV	
 	##q(b	
 	$$&'--/9	



G 7u$&&w||A7
$$W\\^48!++Hfkk!nEK!++A.K$V-?UIx(( **7<<?;J **1-J$$X.--"8"qLO 1!++GV\\!_EK!++HMMO<K:x44r:   c                     t        | ||g| \  }}}}t        |      dk(  r|j                  |j                               }||||fS )Nr  )r  r   r   r   )r9  r   rw  rQ   r   r  r  r  s           r8   meta_embedding_bag_forward_onlyr    sX    0B1#'1-FJ+ 7u$$$W\\^4:x44r:   c                     |r|S | j                   j                  s| j                   j                  r| j                   S |rt        j                  S | j                   S rD   )r^   r   r   rV   r   )r   r^   promote_int_to_longs      r8   _get_reduction_dtyper    sD    {{$$(>(>{{	zz;;r:   r   c                    t        | |d      }t        j                  | j                  |      }t	        | ||      }| j                  ||      S )NT)r  r   )r  rO   r  r   r  r   )r   r  r  r^   r  rg  s         r8   meta_nansumr    sI     (u$OLT2D+E4AL??<|?<<r:   c           	          t        j                  | j                  t        t	        | j                                           }| j                  |      S rD   )rO   r  r   rf   r   r   r   )r   rg  s     r8   meta_medianr    s<    77U5-.L ??<((r:   c                    t        |       dk(  rt        j                  d       t        j                  | j                  |f      }t        | ||      }| j                  |      | j                  |t        j                        fS )Nr   zmedian CUDA with indices outputr   )	r   rO   alert_not_deterministicr  r   r  r   rV   r   )r   r   r  rg  s       r8   meta_median_mode_dimr    sp     5V#%%&GH


u{{SF
3C+E3@L%EJJ7 r:   c                     | S rD   r4   r   s    r8   meta_logical_not_r    rq  r:   c                    t        j                  t        |      | j                         k\  d        t	        |      D ]"  \  t        j                  dk\  fd       $ t        |      | j                         z
  }d|z  t        | j                        z   }t        t        |            D cg c]  }||   ||   z   }}| j                  |      S c c}w )Nc                       y)NzZNumber of dimensions of repeat dims can not be smaller than number of dimensions of tensorr4   r4   r:   r8   re   zmeta_repeat.<locals>.<lambda>  rm   r:   r   c                      d d  S )Nz"Repeats cannot be negative, found r  r4   )r   reps   r8   re   zmeta_repeat.<locals>.<lambda>  s    8ZsK r:   r  )	rV   rg   r   r   r  rf   r   r   r   )r   r  num_new_dimensionspadded_sizer   target_sizer  s       ` @r8   meta_repeatr    s    	LLG
"l G$ 
31HK	

 W
2++eDJJ.??K8=c'l8KL1;q>GAJ.LKL>>+&& Ms   1Cc                     | S rD   r4   r   s    r8   
meta_zero_r    rq  r:   c                 z    t        |t        j                        r t        | j                  |j                         | S rD   )rp   rV   r   ri   r   r   r   s     r8   meta_binop_inplacer    s)     %&

EKK8Kr:   c                     d }d }d } ||       r ||      rt        d       ||       r ||      st        d      t        |t        j                        r t	        | j
                  |j
                         | S )a*  
    Some checks for inplace ops.
    Checks for promotion rules for some dtypes.
    int.add/sub_(float) and bool.add/sub_(others) are rejected.
    Promoting in these in-place operations would require reallocating
    and copying over elements, hence not allowed.
    Checks for alpha param.
    c                     t        | t              rt        j                  | j                        S t        | t
              S rD   )rp   r   rO   r>  r^   r   rs   s    r8   is_integericz.meta_binop_inplace_alpha.<locals>.is_integeric  s.    c:&))#))44c7++r:   c                     t        | t              rt        j                  | j                        S t        | t
              S rD   )rp   r   rO   r  r^   r   r  s    r8   
is_floaticz,meta_binop_inplace_alpha.<locals>.is_floatic  s.    c:&''		22c9--r:   c                     t        | t              rt        j                  | j                        S t        | t
              S rD   )rp   r   rO   is_boolean_dtyper^   r   r  s    r8   is_booleanicz.meta_binop_inplace_alpha.<locals>.is_booleanic  s.    c:&))#))44c8,,r:   z]Promotion of int.add/sub_(float) in in-place ops are not possible due to element size change.z_Promotion of book.add/sub_(others) in in-place ops are not possible due to element size change.)r&  rp   rV   r   ri   r   )r   r   rU  r  r  r  s         r8   meta_binop_inplace_alphar    sz    $,.- Dj/k
 	

 D,u"5m
 	
 %&

EKK8Kr:   c                 :    t        | |t        j                        S Nr  rT   r   rP   r   r   rU  s      r8   meta_binop_alphar  "  s     e$C$K$K r:   c                 8    t        | t        j                        S r  r  )r   r  s     r8   
meta_roundr  .  s    <DD r:   c                 l    t        j                  t        j                  j                         fd       t        t         j                        r8t        j                  t        j                  j                         fd       y t        j                  t        t               fd       y )Nc                  &      dj                    S )Nz7: Expected input tensor to have an integral dtype. Got r   )r  r   s   r8   re   z#shift_dtype_check.<locals>.<lambda>8  s    7)RSWS]S]R^_ r:   c                  &      dj                    S )Nz6: Expected shift value to have an integral dtype. Got r   r  r  s   r8   re   z#shift_dtype_check.<locals>.<lambda>=  s    wiUVYV_V_U`a r:   c                        d S )Nz): Expected shift value to be an int. Got r4   r  s   r8   re   z#shift_dtype_check.<locals>.<lambda>B  s    wiHN r:   )rV   rg   rO   r>  r^   rp   r   r   )r  r   r  s   ```r8   shift_dtype_checkr  5  sp    	LLtzz*_ #u||$""399-a	

 	sG$N	
r:   c                 T    t        d| |       t        | |t        j                        S )Nrshiftr  r  rT   r   rP   r  s     r8   meta_rshiftsr  F  )    he,e$C$K$K r:   c                 T    t        d| |       t        | |t        j                        S )Nlshiftr  r
  r  s     r8   meta_lshiftsr  N  r  r:   c                 8    | j                  | j                        S rD   r  r   s    r8   	meta_zeror  V  s    >>$**%%r:   c                     | S rD   r4   r   r  s     r8   
meta_fill_r  [  rq  r:   c                 ,    t        j                  |       S rD   r  r  s     r8   	meta_fillr  `      D!!r:   c                     | S rD   r4   r   s    r8   
meta_relu_r  e  rq  r:   c                 :    t        | |t        j                        S r  r  r  s      r8   meta__add_relur  j  s     e$C$K$K r:   c                 ,    t        j                  |       S rD   r  r   noiselowerr  r  r  s         r8   meta_rrelu_with_noiser   r  s    
 D!!r:   c                 V    t        j                  |       t        j                  |      fS rD   r  r  s         r8    meta_rrelu_with_noise_functionalr"  z  s%     D!5#3#3E#:::r:   c                     | S rD   r4   )r   r  r  r  r  s        r8   meta_rrelu_with_noise_r$    s	     Kr:   c                 ,    t        j                  |       S rD   r  r   r   r   
accumulates       r8   meta_index_putr(    r  r:   c                 F    t        | j                  |j                         | S rD   ri   r   )r   r  values      r8   meta_masked_fill_r,    s    DJJ

3Kr:   c                     | j                  | j                               j                  t        j                  |             }|S r   )r   r   r'  rO   r   )r   r  r}  masked_scales       r8   meta__masked_scaler/    s<    >>$))+.1111$7 2 L r:   c                      t        j                  |j                  t         j                  t         j                  fv d        t        j                   j                  j                  k(   fd        S )Nc                       y)NzMask must be bool or uint8r4   r4   r:   r8   re   z&meta_masked_scatter_.<locals>.<lambda>  rm   r:   c                  <    d j                    dj                    S )NzEmasked_scatter: expected self and source to have same dtypes but got r   r   )r   rj  s   r8   re   z&meta_masked_scatter_.<locals>.<lambda>  s"     **U6<<.: r:   )rV   rg   r^   r  r  )r   r  rj  s   ` `r8   meta_masked_scatter_r3    sU    	LL

uzz5;;//1U 
LL

fll"	:
 Kr:   c                     t        | |      \  } }t        j                  | t        j                        }t	        |||      S r   )r&   rV   r   r   r3  )r   r  rj  r   s       r8   meta_masked_scatterr5    s;     "$-JD$d%2I2IJFf55r:   c                 $    | j                  |      S rD   r  )r   r  rG  s      r8   meta_masked_scatter_backwardr7    s    >>%  r:   c                     | S rD   r4   r&  s       r8   meta_index_put_r9    rq  r:   c           	         ddl m}m} t        j                  | j                         dk(  d        t        j                  |j                         dk(  d        | j                         }|j                         |d   |d   |d   }d   }	||	ft        j                   | |d          |d               fd       |r| j                  t        j                  k(  xs | j                  t        j                  k(  xr |t        j                  k(  }
t        j                  || j                  k(  xs |
d	        |j                        j                  |      }n|j                        }|sYWt        j                  j                         dk(  d
        t        j                   |j                               fd       |S )Nr   )sym_andrT  r0   c                       yrV  r4   r4   r:   r8   re   z)common_meta_baddbmm_bmm.<locals>.<lambda>  rm   r:   c                       yrX  r4   r4   r:   r8   re   z)common_meta_baddbmm_bmm.<locals>.<lambda>  rm   r:   r   r3   c            	      .    d d d d    d d    d	S r]  r4   r_  s   r8   re   z)common_meta_baddbmm_bmm.<locals>.<lambda>  s5    RSURV
l<?*;2l1o=NbR r:   c                       y)Nzfout_dtype only supported for torch.float32 output with float16/bfloat16 inputs or same as input dtypesr4   r4   r:   r8   re   z)common_meta_baddbmm_bmm.<locals>.<lambda>  rm   r:   c                       y)Nzself must be a 3D tensorr4   r4   r:   r8   re   z)common_meta_baddbmm_bmm.<locals>.<lambda>  rm   r:   c                  0    d  dj                          S )Nz*Expected an input tensor shape with shape z but got shape: r   )r{  self_baddbmms   r8   re   z)common_meta_baddbmm_bmm.<locals>.<lambda>  s!    @M]^j^o^o^q]rs r:   )r$  r;  rT  rV   rg   r   r   r^   ra  rb  r`  r   r'  )rZ  r[  is_bmmrB  r=  r;  rT  ri  res_rowsres_colssupported_out_dtyper   r`  ra  rb  r{  s      `        @@@@r8   common_meta_baddbmm_bmmrG    s   E	LL"$HI	LL"$HI;;=L;;=L	aB#AAHAHx*K	LL|A+VLOEU-VW	R
 LLEMM)KV\\U^^-K)5==( 	 	%<)<|	
 !!+.11)< !!+.l.\%%'1,.PQ<$$&4s	

 Mr:   c                     t        | |d      S )NTrG  )r   rJ  s     r8   meta_bmmrJ    s    "4t44r:   c                      t        | |d|      S )NT)r=  rI  )r   rJ  r=  s      r8   meta_bmm_dtyperL    s    "4tyIIr:   c                 h    | |z  }| |z  }|dk7  r"t        |dk        t        |dk        k7  r|dz  }|S r  )r  )r=   r>   qra  s       r8   div_rtnrO    sB    	QA	AA 	Av4A;$q1u+-	QHr:   c                     t        | |z   |z   ||dz
  z  z
  dz
  |r|dz
  ndz   |      dz   }|r|dz
  |z  | |z   k\  r|dz  }|S r  )rO  )	inputSize
kernelSizer  r  r   r  r  
outputSizes           r8   pooling_output_shape_pad_lrrT    s     	 *q.)* 	
 'vzA/ 	
 		  Nf$	E(99!OJr:   c           	          t        j                  |dk7  d        t        j                  dk\  fd       t        j                  dz
  z  dz   dz  k  fd       t        | ||      S )Nr   c                       y)Nzstride should not be zeror4   r4   r:   r8   re   z&pooling_output_shape.<locals>.<lambda>  rm   r:   c                      d  S )Nz'pad must be non-negative, but got pad: r4   pads   r8   re   z&pooling_output_shape.<locals>.<lambda>  s    %LSE#R r:   r3   r   c                      d d d  S )NzApad should be at most half of effective kernel size, but got pad=z, kernel_size=z and dilation=r4   )r  rR  rY  s   r8   re   z&pooling_output_shape.<locals>.<lambda>  s'    OPSu U%,nXJ@ r:   )rV   rg   rT  )rQ  rR  rY  r   r  r  s    `` ` r8   r9  r9    ss    	LL1AB	LLRS	LLa8+a/A55	
 ':sC9 r:   c           	      \   	
  j                         }	t        j                  dkD  xr dkD  fd       t        j                  dkD  xr dkD  fd       t        j                  dkD  xr dkD  fd        j                  d      dk7  xr  j                  d      dk7  }|t        j                  k(  r7t        j                  |dk(  xr |xr  j                  d      dk7   fd	       nWt        j                  |dk(  xr  j                  d      dk7  xr |xs |dk(  xr |xr  j                  d      dk7   fd
       t        j                  dz  k\  xr dz  k\  fd       t        j                  dk\  xr dk\  
	fd       y )Nr   c                      d  d S )Nz5kernel size should be greater than zero, but got kH: , kW: r4   )r=  r>  s   r8   re   z$pool2d_shape_check.<locals>.<lambda>9  s    Gt6RTQUV r:   c                      d  d S )Nz0stride should be greater than zero, but got dH: , dW: r4   )r?  r@  s   r8   re   z$pool2d_shape_check.<locals>.<lambda>=  s    B2$fRDQ r:   c                      d  d S )Nz9dilation should be greater than zero, but got dilationH: , dilationW: r4   )	dilationH	dilationWs   r8   re   z$pool2d_shape_check.<locals>.<lambda>A  s    KI;Vcdmcno r:   r3   r   r	  r0   c                  *    d j                          S )NzExpected 4D (batch mode) tensor expected for input with channels_last layout with optional 0 dim batch size for input, but got: r   r  s   r8   re   z$pool2d_shape_check.<locals>.<lambda>I  s     CCH::<.R r:   c                  *    d j                          S )NzYExpected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input, but got: r   r  s   r8   re   z$pool2d_shape_check.<locals>.<lambda>P  s    opupzpzp|o}~ r:   c                       d d d d  S )NzKpad should be smaller than or equal to half of kernel size, but got padW = z	, padH = z, kW = z, kH = r4   )r=  r>  rA  rB  s   r8   re   z$pool2d_shape_check.<locals>.<lambda>U  s&     ygbT> r:   c                  .    d d  d d d d dS NzGiven input size: (r=   z). Calculated output size: (z). Output size is too smallr4   )rC  rD  r  rI  r	  r
  s   r8   re   z$pool2d_shape_check.<locals>.<lambda>[  s:    %k]!K=* N$$0><.+ O## r:   )r   rV   rg   r   r  )r   r=  r>  r?  r@  rA  rB  rb  rc  r  rC  rD  r	  r
  r   r   
valid_dimsrI  s   ``````````````   @r8   r:  r:  #  s   " 99;DL	LL
Q26V 
LL
Q26Q 
LLA')a-o
 A!#:

1(:J+++AI;*;A!);R	
 	QY<5::a=A-<* A	?j?UZZ]a-?~	
 
LL
a4+B!GtO	> 
LLq.\Q.	# 	#r:   re  rb  r=  r>  rc  r?  r@  pTpHpW	dilationTrb  rc  rf  rg  rh  ri  rj  rk  r`  c           
      J   	
  j                   }t        j                  dkD  xr dkD  xr dkD  fd       t        j                  dkD  xr dkD  xr dkD  fd       t        j                  dkD  xr dkD  xr dkD  fd       t        j                  |dv  fd       t        |      D ]:  |dk(  rdk(  rt        j                   j	                        dkD   fd       < |r/t        j                  k\  xr k\  xr k\  fd	       t        j                  d
z  k\  xr d
z  
k\  xr d
z  	k\  	
fd       t        j                  dk\  xr dk\  xr dk\  fd       y )Nr   c                      d d  d S )Nz5kernel size should be greater than zero, but got kT: z, kH: r]  r4   )r=  rb  r>  s   r8   re   z$pool3d_shape_check.<locals>.<lambda>}  s#    $fRDrd, r:   c                      d d  d S )Nz0stride should be greater than zero, but got dT: z, dH: r_  r4   )r?  rc  r@  s   r8   re   z$pool3d_shape_check.<locals>.<lambda>  s     >rd&FSURVW r:   c                      d d  d S )Nz9dilation should be greater than zero, but got dilationT: z, dilationH: ra  r4   )rb  rm  rc  s   r8   re   z$pool3d_shape_check.<locals>.<lambda>  s$    #M)M)V r:   r\  c                  &      dj                    S )Nz/: Expected 4D or 5D tensor for input, but got: r9  )r  r   s   r8   re   z$pool3d_shape_check.<locals>.<lambda>  s    7)J5;;-X r:   r.  c                  L      dj                    dj                         dS )NzZ: Expected input's non-batch dimensions to have positive length, but input has a shape of z and non-batch dimension z has length zero!)r   r   )r  r   r   s   r8   re   z$pool3d_shape_check.<locals>.<lambda>  s3    ) --2[[M+EJJqM?:KM r:   c                  .    d d  d d d d dS )Nzinput image (T: r6  r  z ) smaller than kernel size (kT:  kH:  kW: ry   r4   )rg  rf  rh  r=  rb  r>  s   r8   re   z$pool3d_shape_check.<locals>.<lambda>  s9    "5'gYd6( C$$&4uRDbT< r:   r   c                  ,    d d d  d d d S )NzHpad should be smaller than or equal to half of kernel size, but got kT: rv  ru  z padT: z padW: z padH: r4   )r=  rb  r>  rk  rj  rl  s   r8   re   z$pool3d_shape_check.<locals>.<lambda>  s6    $eB4uRDt72$gbTK r:   r3   c                  :    d d d  d d d d d dS rh  r4   )rg  rf  rh  re  rj  ri  rk  s   r8   re   z$pool3d_shape_check.<locals>.<lambda>  sI    !'!E7!G9AfX F((/y%'!F8 L'( r:   )r   rV   rg   r   r   )r   re  rb  r=  r>  rc  r?  r@  rj  rk  rl  rm  rb  rc  rf  rg  rh  ri  rj  rk  r  r`  r   r   s   `````````````````````  @r8   ra  ra  a  s   0 ::D	LL
Q$26$b1f	
 
LL
Q$26$b1f	
 
LLA9)a-9IM	
 
LLX
 4[ 
19aJJqMA	
	
 RK:GrM:fl 	
 
LL
Q"6a26"q&B,	
 	
 
LL
3v{3w!|	
 	
r:   c                 j   | j                   }t        | |||||||	|
||||||||||||       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       y )Nr	  r0   r   r3   r   ra  r   )r   r  r   re  rb  r=  r>  rc  r?  r@  rj  rk  rl  rm  rb  rc  rf  rg  rh  ri  rj  rk  r  r   s                           r8   max_pool3d_backward_shape_checkr{    s    2 ::D








+0 ;dQh8;dQh6;dQh8;dQh77D$(G47D$(E27D$(G47D$(F3r:   c                     | j                   }t        | ||||||||	|
|ddd|||||||d       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       t        |||dz
  |       y )Nr3   Tr	  r0   r   rz  )r   r  re  rb  r=  r>  rc  r?  r@  rj  rk  rl  rf  rg  rh  ri  rj  rk  r  r   s                       r8   rs  rs    s    * ::D








			-2 ;dQh8;dQh6;dQh8;dQh7r:   c                    d } |d|      \  }}t        j                  t        |      dv d        t        |      dk(  r||}
}	n |d|      \  }	}
 |d|      \  }} |d|      \  }}| j                  d	      }| j                  d
      }| j                  d      }t	        j
                  |       }|t         j                  k(  r)t        j                  | j                         dk(  d        nR|t         j                  k(  r(t        j                  | j                         dv d        nt        j                  dd        t        ||||	||      }t        ||||
||      }t        | |||	|
||||||||||       |||fS )Nc                      t        j                  t        |      dv  fd       |d   }t        |      dk(  r|n|d   }||fS )Nr#  c                      d  dS )Nzmax_pool2d: r%  r4   r&  s   r8   re   zEmax_pool2d_checks_and_compute_shape.<locals>.unpack.<locals>.<lambda>=  r'  r:   r   r3   r(  r)  s   `   r8   r,  z3max_pool2d_checks_and_compute_shape.<locals>.unpack:  r-  r:   r  r.  c                       y)NzOmax_pool2d: stride must either be omitted, a single int, or a tuple of two intsr4   r4   r:   r8   re   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>G  rm   r:   r   r   r  r  r5  r  r   r	  c                       y)NzMnon-empty 4D (batch mode) tensor expected for input with channels_last layoutr4   r4   r:   r8   re   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>X  rm   r:   r  c                       y)Nz9non-empty 3D or 4D (batch mode) tensor expected for inputr4   r4   r:   r8   re   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>]  rm   r:   Fc                       y)NzAUnsupported memory format. Supports only ChannelsLast, Contiguousr4   r4   r:   r8   re   z5max_pool2d_checks_and_compute_shape.<locals>.<lambda>b  rm   r:   )rV   rg   r   r   rO   r   r  r   r   r9  r:  )r   r  r   r  r  r  r,  r=  r>  r?  r@  rA  rB  rb  rc  r  rC  rD  r   r	  r
  s                        r8   r  r  1  s    M;/FB	LLFy a 6{aRB&)B	7+JD$!*h7Iy**R.K**R.KBJ//6M+++IIK1c	
 
%11	1IIK6!O	

 	W	

 (Rr9iXL&z2tRIVK



$ k11r:   c                 |    t        |||||      \  }t        j                  j                   j                  k(   fd       |j                  fd}	 |	         |	|       t        j                        }
t        j                  j                  j                  j                  |
      S )Nc                  <    dj                    d j                    S )NzExpected dtype z  for `gradOutput` but got dtype r   r  s   r8   re   z7meta_max_pool2d_with_indices_backward.<locals>.<lambda>  s     /$**-MkN_N_M`a r:   c                 l    t        | dz
         t        | dz
         t        | dz
         y )Nr0   r   r3   )r   )r  rI  r   r	  r
  s    r8   _check_dim_sizez>meta_max_pool2d_with_indices_backward.<locals>._check_dim_size  s9    q$q,7q$q,7q$q+6r:   r~  )
r  rV   rg   r^   r   rO   r   r   r   r   )r  r   r  r   r  r  r  r   r  r  r   rI  r   r	  r
  s   ``         @@@@r8   %meta_max_pool2d_with_indices_backwardr  }  s     	,k67Hi		
 
LL

k'''a
 L99D7
 K G//5M;;

jj{{#	 r:   c                    t        | |||||      \  }}}| j                         dk(  r| j                  d      nd}	t        j                  |       }
| j                         dk(  r|||g}n|	|||g}t        j                  || j                  | j                  |
      t        j                  |t
        j                  | j                  |
      fS r  )
r  r   r   rO   r   rV   r   r^   r   r   r  s               r8   meta_max_pool2d_with_indicesr    s     	,{FGXy		
  %yy{a/UZZ^QF//6Myy{a\;7\;?++<<'		
 	++<<'		
 r:   c                 t   
 t        j                   j                  dv  fd        j                  }t        |dz
  |      D ].  
t        j                   j	                  
      dkD  
 fd       0 t        j                  t              dk(  d        t        j                  t        |      dk(  d         j	                  d	      } j	                  d
       j	                  d      |dk(  r j	                  d      }nd}t        j                   j                  j                  k(  d        t        j                  j                  dk(  fd       j	                  d      }j	                  d      }j	                  d      
t        j                  ||k\  d        t        j                  ||k(  d        t        j                  
dk(  
fd       t        j                  |d   d   z   dz
  k  fd       t        j                  |d   d   z   dz
  k  fd        j                         dk(  r|||d   |d   g}	n||d   |d   g}	t        j                  |	 j                   j                        t        j                  |	t         j                   j                        fS )Nr  c                  "    d j                    S )Nz:fractional_max_pool2d: Expected 3D or 4D tensor, but got: r  r   s   r8   re   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    LTYYKX r:   r0   r   c                  2    dj                          d  dS )Nz_fractional_max_pool2d: Expected input to have non-zero  size for non-batch dimensions, but got r  z emptyr   )r   r   s   r8   re   z,meta_fractional_max_pool2d.<locals>.<lambda>  s'     77;yy{mCSTUSVV\^ r:   r   c                       y)NzNfractional_max_pool2d: kernel_size musteither be a single int or tuple of Intsr4   r4   r:   r8   re   z,meta_fractional_max_pool2d.<locals>.<lambda>  rm   r:   c                       y)NzOfractional_max_pool2d: output_size must either be a single int or tuple of Intsr4   r4   r:   r8   re   z,meta_fractional_max_pool2d.<locals>.<lambda>  rm   r:   r5  r  r   r	  r3   c                       y)Nz6Expect _random_samples to have the same dtype as inputr4   r4   r:   r8   re   z,meta_fractional_max_pool2d.<locals>.<lambda>  rm   r:   c                  "    d j                    S )Nz1Expect _random samples to have 3 dimensions got, r  )random_sampless   r8   re   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    CNDWDWCXY r:   c                       y)Nz=Expect _random_samples.size(0) no less then input batch size.r4   r4   r:   r8   re   z,meta_fractional_max_pool2d.<locals>.<lambda>  rm   r:   c                       y)Nz<Expect _random_samples.size(1) equals to input channel size.r4   r4   r:   r8   re   z,meta_fractional_max_pool2d.<locals>.<lambda>  rm   r:   c                      d  dS )Nz/Expect _random_samples.size(2) equals to 2 got .r4   )r   s   r8   re   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    #RSTRUUV!W r:   c                      dd    d  S )Nz%fractional_max_pool2d: kernel height r   z' is too large relative to input height r4   )input_heightr  s   r8   re   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    7A7GGno{n|} r:   c                      dd    d  S )Nz$fractional_max_pool2d: kernel width r3   z& is too large relative to input width r4   )input_widthr  s   r8   re   z,meta_fractional_max_pool2d.<locals>.<lambda>  s    6{1~6FFlmxlyz r:   r  )rV   rg   r   r   r   r   r^   r   r   r   r   )r   r  r{  r  r   input_channelsinput_batchr  cr   r   r  r  s   `` `      @@@r8   meta_fractional_max_pool2dr    ss   	LL		VX 99D4!8T" 
IIaL1^	

 
LLKA	2
 
LLKA	2 YYr]N99R=L))B-Kqyiil	LL

n***H 
LLq Y
 	AAAAAA	LL	[O 
LL	^N 
LLaWX	LLAQ'!+|;} 
LLAQ'!+{:z
 xxzQ^[^[^LAA? 	**;;	

 	++;;	
 r:   c                    t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  | xs t        |      dv d        |s|n|d   }	|s|nt        |      dk(  r|	n|d   }
|s|nt        |      dk(  r|	n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  | j                  d	v d
        | j                  dk(  r| j	                  d      nd}| j	                  d      }| j	                  d      }| j	                  d      }| j	                  d      }t        ||||	||      }t        ||||
||      }t        ||||||      }t        | |||||	|
|||||||||||||d       | j                  dk(  xr& t        j                  |       t         j                  k(  }| j                  dk(  r||||f}n|||||f}| j                  |      }| j                  |t         j                        }|r@|j                  t         j                        }|j                  t         j                        }||fS )NrT  c                       yNzMmax_pool3d: kernel_size must either be a single int, or a tuple of three intsr4   r4   r:   r8   re   z.meta_max_pool3d_with_indices.<locals>.<lambda>1  rm   r:   r   r3   r   c                       yNzQmax_pool3d: stride must either be omitted, a single int, or a tuple of three intsr4   r4   r:   r8   re   z.meta_max_pool3d_with_indices.<locals>.<lambda>9  rm   r:   c                       yNzImax_pool3d: padding must either be a single int, or a tuple of three intsr4   r4   r:   r8   re   z.meta_max_pool3d_with_indices.<locals>.<lambda>A  rm   r:   c                       yNzJmax_pool3d: dilation must be either a single int, or a tuple of three intsr4   r4   r:   r8   re   z.meta_max_pool3d_with_indices.<locals>.<lambda>I  rm   r:   r\  c                       yr^  r4   r4   r:   r8   re   z.meta_max_pool3d_with_indices.<locals>.<lambda>Q  rm   r:   r.  r  r5  r  r   zmax_pool3d_with_indices()r	  r   r   )rV   rg   r   r   r   r9  ra  rO   r   r  r   r   r'  )r   r  r   r  r  r  rb  r=  r>  rc  r?  r@  rj  rk  rl  rm  rb  rc  r  re  rf  rg  rh  ri  rj  rk  r  r   r   r   s                                 r8   meta_max_pool3d_with_indicesr  %  s    
LLKF"_ 
QB;1$+a.B;1$+a.B	LL
+c&kV+c vayBc&kQ&6F1IBc&kQ&6F1IB	LLG[ 
B7|q gajB7|q gajB	LLH\ I ]a/	Xa[I ]a/	Xa[I	LL

fK
  %zzQUZZ^AFjjnGJJrNEjjnGZZ^F BIyIE"7BB	9MG!&"b"iKF








#+4 	

aXE77>%BXBXX  zzQeWf5	WeWf=	
//)
$Cooiu{{o;Gff5#9#9f:**5+A+A*B<r:   c                    t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }	t        |      dk(  r|n|d   }
t        j                  | xs t        |      dv d        |s|n|d   }|s|	nt        |      dk(  r|n|d   }|s|
nt        |      dk(  r|n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  t        |      dv d        |d   }t        |      dk(  r|n|d   }t        |      dk(  r|n|d   }t        j                  |j                  d	v d
        |j	                  d      }|j	                  d      }|j	                  d      }|j	                  d      }| j	                  d      }| j	                  d      }| j	                  d      }t        || ||||	|
|||||||||||||||d       |j                  dk(  xr& t        j                  |      t         j                  k(  }|j                  |j                        }|r |j                  t         j                        }|S )NrT  c                       yr  r4   r4   r:   r8   re   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>  rm   r:   r   r3   r   c                       yr  r4   r4   r:   r8   re   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>  rm   r:   c                       yr  r4   r4   r:   r8   re   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>  rm   r:   c                       yr  r4   r4   r:   r8   re   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>  rm   r:   r\  c                       yr^  r4   r4   r:   r8   re   z7meta_max_pool3d_with_indices_backward.<locals>.<lambda>  rm   r:   r  r5  r  r   z"max_pool3d_with_indices_backward()r.  r   )rV   rg   r   r   r   r{  rO   r   r  r   r   r'  )r  r   r  r   r  r  r  r   rb  r=  r>  rc  r?  r@  rj  rk  rl  rm  rb  rc  re  rf  rg  rh  ri  rj  rk  r  r  s                                r8   %meta_max_pool3d_with_indices_backwardr    s    
LLKF"_ 
QB;1$+a.B;1$+a.B	LL
+c&kV+c vayBc&kQ&6F1IBc&kQ&6F1IB	LLG[ 
B7|q gajB7|q gajB	LLH\ I ]a/	Xa[I ]a/	Xa[I	LL

fK
 jjnGJJrNEjjnGZZ^FR Er"Gb!F#








,/8 	

aXE77>%BXBXX  -J]]1G1G]H
r:   gridc                 z    t        j                   j                  j                  k(   fd       t        j                   j                  t         j                  k(  xr j                  t         j                  k(   fd       t        j                   j
                  d   j
                  d   k(   fd       t        j                  j
                  d    j                  dz
  k(   fd       t        d j                        D ],  t        j                   j
                     dkD   fd       . y )	Nc                  <    dj                    d j                    S )NzNgrid_sampler(): expected input and grid to be on same device, but input is on z and grid is on r  r  r   s   r8   re   z+check_grid_sampler_common.<locals>.<lambda>  s'    \\N"24;;-A r:   c                  <    dj                    d j                    S )NzTgrid_sampler(): expected input and grid to have torch.strided layout, but input has z and grid has )r   r  s   r8   re   z+check_grid_sampler_common.<locals>.<lambda>  s&    nT[[MC r:   r   c                  <    dj                    d j                    S )NzZgrid_sampler(): expected grid and input to have same batch size, but got input with sizes  and grid with sizes r9  r  s   r8   re   z+check_grid_sampler_common.<locals>.<lambda>  s'      %},A$**O r:   r   r   c                  B    dj                   dz
   d j                   S )Nz+grid_sampler(): expected grid to have size r   z, in last dimension, but got grid with sizes )r   r   r  s   r8   re   z+check_grid_sampler_common.<locals>.<lambda>  s,    9%**q.9I J226**? r:   c                  *    dj                    d  dS )NzYgrid_sampler(): expected input to have non-empty spatial dimensions, but input has sizes r  r  r9  r  s   r8   re   z+check_grid_sampler_common.<locals>.<lambda>  r  r:   )rV   rg   r   r   r  r   r   r   )r   r  r   s   ``@r8   check_grid_sampler_commonr    s    	LL#	
 
LL%F$++*F	
 
LLA$**Q-'	
 
LL

2%**q.(	
 1ejj! 
KKNQ	

r:   c                       e Zd ZdZdZdZy)GridSamplerInterpolationr   r3   r   N)r{   
__module____qualname__BILINEARNEARESTBICUBICr4   r:   r8   r  r    s    HGGr:   r  interpolation_modec                     t        j                   j                  dk(  xr  j                  j                  k(   fd       t        j                   j                  dk(  xr |t        j                  j
                  k(   d        y )Nr.  c                  <    dj                    d j                    S )Nzdgrid_sampler(): expected 5D input and grid with same number of dimensions, but got input with sizes r  r9  r  s   r8   re   z'check_grid_sampler_3d.<locals>.<lambda>  s&    449KK=#DJJ<1 r:   c                       y)Nz<grid_sampler(): bicubic interpolation only supports 4D inputr4   r4   r:   r8   re   z'check_grid_sampler_3d.<locals>.<lambda>&  rm   r:   )rV   rg   r   r  r  r+  )r   r  r  s   `` r8   check_grid_sampler_3dr    sp    	LL

a3EJJ$))3	
 
LLJJ!O M"&>&F&F&L&LL	
 	Or:   c                     |d   }|r&t        j                  |t         j                        }nd }t        j                  |t         j                        }	||	fS Nr   r   )rV   r  r   r   
r  r   r  r  padding_modealign_cornersr  input_requires_gradr  	grad_grids
             r8   grid_sampler_2d_backward_metar  *  sQ     &a.%%e5;R;RS

  U5L5LMI	""r:   c                     t        | |       t        | ||       | j                  d   }| j                  d   }|j                  d   }|j                  d   }|j                  d   }	| j                  |||||	f      S )Nr   r3   r   r0   )r  r  r   r   )
r   r  r  r  r  r  Cout_Dout_Hout_Ws
             r8   grid_sampler_3dr  =  sv     eT*%'9:AAAAJJqMEJJqMEJJqME??Aq%677r:   r  c                     t        ||       t        |||       |d   }|r&t        j                  |t        j                        }nd }t        j
                  |t        j                        }	||	fS r  )r  r  rV   r  r  r   r  s
             r8   grid_sampler_3d_backwardr  P  sm     eT*%'9:%a.%%!?!?

 
  U5S5STIy  r:   c                     |j                  d      }|st        j                  |      }||d<   t        j                  | g|i |S )Nr^   )r]   rO   	get_dtyperV   r   )r   r  rQ   r  r^   s        r8   fullr  h  sC    JJwE
+F7O;;t-d-f--r:   c                 N   |t         j                  k(  rt        j                  |d u d        t        j                  d|| j                  n|||| j
                  n||      }| j                  r>|j                  | j                         | j                         | j                                n/|j                  | j                         | j                         d       |j                  d       |S t        j                  j                  | |||||      }|j!                  d       |S )Nc                       y)Nz9memory format option is only supported by strided tensorsr4   r4   r:   r8   re   zzeros_like.<locals>.<lambda>  rm   r:   r   r
  Tr  )rV   
sparse_coorg   r   r^   r   	is_sparsesparse_resize_and_clear_r   
sparse_dim	dense_dimr   _coalesced_r-   r   r)  fill_)r   r^   r   r   r   r   r  s          r8   r  r  s  s     !!!T!O	

 kk %$**5"(.4;;f!
 >>((		T__.0@ ((dhhj!D

//
!
!# " C IIaLJr:   r   c                    |t        j                         }|t        j                         }|t         j                  }t        j                  | ||||      S r  rV   r   get_default_devicer  r   r   r^   r   r   r   r   s         r8   	meta_onesr    T     }'')~))+~;;E&J r:   c                    |t        j                         }|t        j                         }|t         j                  }t        j                  | ||||      S r  r  r  s         r8   
meta_zerosr    r  r:   c                 ,    t        j                  |       S rD   rO   clone_preserve_strides)r   r*  r   r   s       r8   meta_select_scatterr        ''--r:   c                 ,    t        j                  |       S rD   r  )r   r*  r   r}   r|   steps         r8   meta_slice_scatterr    r  r:   dim_post_exprwrap_scalarc           	          |dk  r|st        d| d      d}| }|dz
  }| |k  s| |kD  rt        d|  d| d| d      | dk  r| |z  } | S )	Nr   zdim_post_expr=z <= 0 but wrap_scalar is Falser3   zdim z out of bounds (rx   ry   r  )r   r  r  r   r  s        r8   r   r     s      /MN  .C
!
C
SyC#ItC5(8RuAFGG
Qw}Jr:   c                 J    | j                         dk(  rdS | j                  |   S r  r  )r  r   s     r8   ensure_nonempty_sizer    s!    11.!''#,.r:   c                 :    t         j                         d      }t        j                         d      }t        j                  ||k(  d        t	        |      D ];  k7  s	t        j                  t              t               k   fd       = y )Nr3   c                       y)NzDIndex tensor must have the same number of dimensions as input tensorr4   r4   r:   r8   re   z$gather_shape_check.<locals>.<lambda>  rm   r:   c                  N    d dj                    dj                    d  z   S )Nz!Size does not match at dimension z expected index  to be no larger than self  apart from dimension r9  )r   r   r   r   s   r8   re   z$gather_shape_check.<locals>.<lambda>  s7    ;A3>Nu{{m\/

|;QRUQVWX r:   )r  r   rV   rg   r   r  )r   r   r   	self_dims
index_dimsr   s   ```  @r8   gather_shape_checkr     s    DHHJ"IUYY[!$J	LLZV 9 8LL$UA.2FtQ2OOXr:   c                 p   ddl m} t        || j                               } |j	                         dk(        }|s`t        j                  j                  t
        j                  k(  xs j                  t
        j                  k(  fd       t        | |       | j                  j                        S )Nr   r  c                  "    d j                    S )Nz8gather(): Expected dtype int32/int64 for index, but got r   r   s   r8   re   zmeta_gather.<locals>.<lambda>  s    Nu{{m\ r:   )r$  r  r   r   r   rV   rg   r^   r   r   r   r   r   )r   r   r   sparse_gradr  wrapped_dimis_index_emptys     `    r8   meta_gatherr    s    D dhhj1K#EKKMQ$67NKK5::%A		)A\	
 	4e4>>%++&&r:   c                     |r6| dk(  ry| dk(  ry| dk(  ry| dk(  ry| d	k(  ry
t        j                  dd        y | dk(  ry| dk(  ryt        j                  dd        y )Nr  
REDUCE_ADDr  REDUCE_MULTIPLYmeanREDUCE_MEANamaxREDUCE_MAXIMUMaminREDUCE_MINIMUMFc                       y)Nz=reduce argument must be either sum, prod, mean, amax or amin.r4   r4   r:   r8   re   z#get_operator_enum.<locals>.<lambda>  rm   r:   addmultiplyc                       y)Nz/reduce argument must be either add or multiply.r4   r4   r:   r8   re   z#get_operator_enum.<locals>.<lambda>"  rm   r:   rc  )reduce_use_new_optionss     r8   get_operator_enumr    s{    e$ ##S	
 	e
"$UUVr:   c                 P    ddl m}  ||j                         dk7        rSt        j                  |j
                  t        j                  k(  xs |j
                  t        j                  k(   fd       |1t        j                  |j
                  |j
                  k(   fd       y y )Nr   )rS  c                        dS )Nz((): Expected dtype int32/int64 for indexr4   method_names   r8   re   z,scatter_gather_dtype_check.<locals>.<lambda>-  s    {m#KL r:   c                        dS )Nz0(): Expected self.dtype to be equal to src.dtyper4   r  s   r8   re   z,scatter_gather_dtype_check.<locals>.<lambda>3  s    {m#ST r:   )r$  rS  r   rV   rg   r^   r   r   )r  r   r   src_optrS  s   `    r8   scatter_gather_dtype_checkr  '  sv    CU[[]a'(KK5::%A		)AL	

 JJ'--'T	
 r:   c                     t        | d      S r2   )r  r   s    r8   ensure_nonempty_dimr  7  s    sA;r:   c                     ddl m}  |j                         dk(        ry t        j                  t         j                               t        j                               k(  d        t         j                               }t        |      D ]>  }|k(  r	t        |      }t         |      }t        j                  ||k   fd       @ t        j                  t         j                               t        j                               k(  d        t        |      D ]7  }t        |      }t        |      }	t        j                  ||	k  fd       9 y y )Nr   r  c                       y)NzCIndex tensor must have the same number of dimensions as self tensorr4   r4   r:   r8   re   z%scatter_shape_check.<locals>.<lambda>C  rm   r:   c                  H    dj                    dj                    d  z   S )NExpected index r  r  r9  )r   r   r   s   r8   re   z%scatter_shape_check.<locals>.<lambda>Q  s,    oekk]2Mdjj\Z&se,- r:   c                       y)NzBIndex tensor must have the same number of dimensions as src tensorr4   r4   r:   r8   re   z%scatter_shape_check.<locals>.<lambda>Y  rm   r:   c                  <    d j                    dj                    S )Nr#  z to be no larger than src r9  )r   r  s   r8   re   z%scatter_shape_check.<locals>.<lambda>`  s!    /%++6PQXQ^Q^P_` r:   )	r$  r  r   rV   rg   r  r   r   r  )
r   r   r   r  r  r  r   index_d_sizeself_d_size
src_d_sizes
   ````      r8   scatter_shape_checkr)  <  s-   Dekkmq()	LLDHHJ'+>uyy{+KKU
 $DHHJ/I 9 	
8+E15*43K'-	
	
 
+/B7;;=/QQX	
 y! 	A/q9L-gq9JLL
*`	 r:   c                     t        || j                               }t        d| ||       t        | |||       |t	        ||       y y )Nscatter)r   r   r  r)  r  )r   r   r   r*  r  r  r  s          r8   scatter_meta_implr,  e  sE     dhhj1Ky$s;k5#6'?3 r:   c                 V    t        | |||d       | j                  | j                        S Nr  r,  r   r   r   r   r   r*  s       r8   meta_scatter_addr1  n  s%    dCU3>>$**%%r:   c                 $    t        | |||d       | S r.  r,  r0  s       r8   meta_scatter_add_r4  t  s    dCU3Kr:   c                     t        |t        j                        r|nd }t        | ||||       | j	                  | j
                        S rD   )rp   rV   r   r,  r   r   r   r   r   src_or_valuerk  r*  s         r8   meta_scatterr8  z  s;     %\5<<@,dCdCV4>>$**%%r:   c                 `    t        |t        j                        r|nd }t        | ||||       | S rD   )rp   rV   r   r,  r6  s         r8   meta_scatter_r:    s-     %\5<<@,dCdCV4Kr:   queryr   r+  	dropout_p	is_causalreturn_debug_maskr}  c           	      .   | j                  d      }| j                  d      }| j                  d      }	| j                  d      }
|j                  d      }t        j                  |       }t        j                  |||	ft        j                  | j
                        }|ra|
dkD  rdnd}t        j                  |	|z        }|dk  rd}n|dk  rd}t        j                  |||	|f| j                  | j
                        }n,t        j                  d| j                  | j
                        }t        j                  j                  rt        j                  j                         st        |       d	k(  rMt        j                  d
t        j                  d      }t        j                  d
t        j                  d      }nLt        j                  dt        j                  d      }t        j                  d
t        j                  d      }||d d |	||||f	S )Nr   r3   r   r0   r  @         r   r4   r   )r   rV   r   r   rZ   r   r  ceilr^   r  r  r   r~  r   r   r8  )r;  r   r+  r<  r=  r>  r}  r   	num_headsmax_seqlen_batch_qhead_dimmax_seqlen_batch_k	attention	logsumexpblocksize_cmax_seqlen_k
debug_maskseedoffsets                      r8   (meta__scaled_dot_product_flash_attentionrO    s    AJ

1IAzz!}H!  'I	Y 23kk||I %]cyy!3k!AB$L3&L[[$6E++<<

 [[%++ellK
 }}UZZ446+e:LPU:U{{2UZZ?Ruzz&A{{Aell6BRu||FC 	
 
r:   	q_descale	k_descale	v_descalec
           	          | j                   t        j                  k(  r| j                  t        j                        } t        | ||||||	      S rD   )r^   rV   rc  r'  rb  rO  )
r;  r   r+  rP  rQ  rR  r<  r=  r>  r}  s
             r8   2meta__scaled_dot_product_flash_attention_quantizedrT    sJ     {{e)))(3 r:   	res_shape.c                     t         j                        |k(  rt        j                         }|S t	        g d fdd      }|D cg c]  }||   	 }}t        t        |            D cg c]  }|j                  |       }}t        j                  | j                   j                        j                  |      }|S c c}w c c}w )N)r   r3   r   r0   c                 *    j                         |    S rD   r  )idxr;  s    r8   re   z,alloc_with_matching_layout.<locals>.<lambda>  s    %,,.*= r:   Tr   r  )rf   r   rV   r   sortedr   r   r   r   r^   r   r   )r;  rU  r  	dim_orderrX  permuted_shaper   final_permutes   `       r8   alloc_with_matching_layoutr]    s     U[[Y&u% J =t
	 5>>S)C.>>5:3y>5JK+KKkk%++ell

'-
  	 J ?Ks   C,C	attn_biascompute_log_sumexpc	           	         | j                  d      }	| j                  d      }
| j                  d      }|j                  d      }|j                  d      }|	|
||f}t        | |      }t        j                  |	|
|dft        j                  | j
                        }t        j                  dt        j                  d      }t        j                  dt        j                  d      }||d d ||||d f	S Nr   r3   r   r   r  r4   r   r   r]  rV   r   rZ   r   r   )r;  r   r+  r^  r_  r<  r=  r>  r}  r  r*  S_QS_KVD_VrU  r  
logsum_exprM  rN  s                      r8   (meta__scaled_dot_product_cudnn_attentionrg    s     	

1A

1A
**Q-C88A;D
**R.CAsC I
$UI
6C	
AsAkk||J ;;rF;D[[5::f=F 	
 
r:   c           	         | j                  d      }| j                  d      }	| j                  d      }
|j                  d      }|j                  d      }||	|
|f}t        | |      }t        j                  ||	|
ft        j                  | j
                        }t        j                  dt        j                  d      }t        j                  dt        j                  d      }||d d |
|||d f	S ra  rb  )r;  r   r+  r^  r<  r=  r>  r}  r  H_Qrc  rd  re  rU  r  rf  rM  rN  s                     r8   5meta__scaled_dot_product_fused_attention_overrideablerj  1  s     	

1A
**Q-C
**Q-C88A;D
**R.CCc"I
$UI
6C	
Ckk||J ;;rF;D[[5::f=F 	
 
r:   r  rI  	cum_seq_q	cum_seq_kmax_qmax_kphilox_seedphilox_offsetc                     t        j                  |      }t        j                  |      }t        j                  |      }|||fS rD   r  )r  r;  r   r+  r   rI  rk  rl  rm  rn  r<  r=  ro  rp  r}  grad_qgrad_kr#  s                     r8   'meta__scaled_dot_product_flash_backwardrt  \  sA    , e$Fc"Fe$F66!!r:   	attn_maskc                     | j                  d      }| j                  d      }| j                  d      }	t        j                  |       }
t        j                  ||	|ft        j                  | j
                        j                  dd      }|
|fS )Nr   r3   r   r  )r   rV   r   r   rZ   r   r   )r;  r   r+  r<  r=  ru  r}  r   rD  rE  rH  rI  s               r8   0meta__scaled_dot_product_flash_attention_for_cpurw  x  s     AJ

1IA  'I	

 kk|| i1o  	 r:   c
                 n   t        j                  |j                         d|j                  |j                        }
t        j                  |j                         d|j                  |j                        }t        j                  |j                         d|j                  |j                        }|
||fS )Nr   r   r3   r0   r  )rV   empty_permutedr   r^   r   )r  r;  r   r+  r   rI  r<  r=  ru  r}  rr  rs  r#  s                r8   9meta__scaled_dot_product_flash_attention_for_cpu_backwardr{    s    & !!

kk||	F !!
iizz	F !!

kk||	F 66!!r:   dropout_maskc                      d } |       \   ||      \  }	}
 ||      \  }}
j                   \  |	j                   \  }
}}
 fd}fd}dk\  s
|k  rdk\  r |       S  |       S )Nc                 l   | j                         dk(  r| j                  d      dfS | j                         dkD  rxd}t        | j                         dz
        D ]  }|| j                  |   z  } | j	                  || j                  d      | j                  d      | j                  d            dfS | d	fS )
Nr0   r   Tr	  r3   r5  r  r   F)r   r  r   r   viewr   )r=   r   r   s      r8   	ensure_4dzBmeta__scaled_dot_product_attention_math_for_mps.<locals>.ensure_4d  s    557a<;;q>4''UUWq[J1557Q;' )aggaj(
)66*affRj!&&*affRjI4OOe8Or:   c                  X   j                  j                        } 	r| j                        } j                  f      }	raj                         dk(  r|j	                  d      }| |fS t        j                  d d       |j                  dd z   }|j                  |      }| |fS )Nr0   r   r5  r3   r	  )r   r   view_asr   squeezer   r  )
r   attnr   r   max_seq_lengthnum_headq_q_sizer;  
unsqueezeds
      r8   sdpa_vector_fast_mpszMmeta__scaled_dot_product_attention_math_for_mps.<locals>.sdpa_vector_fast_mps  s    ll288$++e$C||Z6>JKyy{a||A Dy U[["-.Aa@yy'Dyr:   c                  r    d} j                  j                        }j                  | f      }||fS )Nr)  r  )blocksr   r,  r   	head_sizer  r  r  s      r8   sdpa_vector_2pass_mpszNmeta__scaled_dot_product_attention_math_for_mps.<locals>.sdpa_vector_2pass_mps  s>    ll288$||Z669$UVL  r:   i   i   r9  )r;  r   r+  ru  r<  r=  r|  r}  r  k_rR   v_k_sizer  r  r   r  r  r  r  r  r  s   `              @@@@@@@r8   /meta__scaled_dot_product_attention_math_for_mpsr    s    	 u%NB
cNEBeEB.0hh+J&)#%88 Av~q ! ! 	$FVO$8N$&&#%%r:   c                 @   | j                  dd      } |j                  dd      }|j                  dd      }| j                  d      }| j                  d      }	| j                  d      }
|j                  d      }t        j                  ||	|
|| j                  | j
                        }t        j                  j                  r&t        j                  j                         r	 |r|	nd}n|rt        j                  |	dz        dz  nd}t        j                  ||
|ft        j                  | j
                        }|j                  dd      }t        j                  dt        j                  d	      }t        j                  dt        j                  d	      }||||fS )
Nr3   r   r   r  r   r  r)  r4   r   )r   r   rV   r   r^   r   r  r  r   r~  r  rC  rZ   r   )r;  r   r+  r^  r_  r<  r=  r}  r  r  rD  Kvr  logsumexp_dimrf  rM  rN  s                    r8   ,meta__scaled_dot_product_efficient_attentionr    sD    OOAq!E
--1
COOAq!E

1A

1A

2I	BB
++aIrU\\
RC}}UZZ446	 0Q2D		!b&)B.!	
I}%kk||J --1
C ;;rF;D[[5::f=F
D&((r:   grad_input_maskc                    |j                  d      }|j                  d      }|j                  d      }|j                  d      }|j                  d      }|j                  d      }t        j                  ||||fd|j                  |j                        }t        j                  ||||fd|j                  |j                        }t        j                  ||||fd|j                  |j                        }d }|~|
d   ry|j                  d      }|dz  dk(  r|n
|dz   |dz  z
  }t        |j                               }||d<   t        j                  ||j                  |j                        }|d	d |f   }||||fS )
Nr   r3   r   r0   ry  r  r   r(  .)r   rV   rz  r^   r   r   r   )r  r;  r   r+  r^  r   rI  ro  rp  r<  r  r=  r}  r   rD  rm  rF  
head_dim_vrn  rr  rs  r#  	grad_biaslastDimlastDimAligned	new_sizess                             r8   +meta__scaled_dot_product_efficient_backwardr  '  s{   ( AJ

1IJJqMEzz!}HAJHHQKE!!	Yx0kk||	F !!	Yx0iizz	F !!	Yz2kk||	F I!3..$$+bLA$57R<'TV,;V)*	&	"KKY__Y5E5E
	 c8G8m,	669,,r:   c                     t        j                  |      }t        j                  |      }t        j                  |      }|||fS rD   r  )r  r;  r   r+  r   rI  ro  rp  r^  rk  rl  rm  rn  r<  r=  r}  rr  rs  r#  s                      r8   'meta__scaled_dot_product_cudnn_backwardr  c  sA    . e$Fc"Fe$F66!!r:   window_size_leftwindow_size_right	seqused_kalibi_slopesc                    || j                  d      n|j                         dz
  }|| j                  d      n|}||j                  d      n|}| j                  d      }| j                  d      }t        j                  |       }|4t        j                  |||ft        j
                  | j                        }nC| j                  d      }t        j                  ||ft        j
                  | j                        }|	ra|dkD  rdnd}t        j                  ||z        }|dk  rd}n|dk  rd}t        j                  ||||f| j                  | j                        }n,t        j                  d| j                  | j                        }d	\  }}t        j                  j                  rkt        j                  j                         rMt        j                  d
t        j                  d      }t        j                  d
t        j                  d      }nLt        j                  dt        j                  d      }t        j                  d
t        j                  d      }|||||fS )Nr   r3   r  r   r  r@  rA  rB  NNr4   r   r   )r   r   rV   r   r   rZ   r   r  rC  r^   r  r  r   r~  r   r8  )r;  r   r+  rk  rl  rm  rn  r<  r=  r>  r}  r  r  r  r  r   rE  rG  rD  rF  rH  rI  total_qrJ  rK  rL  rM  rN  s                               r8   meta__flash_attention_forwardr    s   4 #,"3A9JQ9NJ*3*;A(1(9!u

2Izz"~H   'IKK$67++<<
	 **Q-KK ELL
	 %]cyy!3k!AB$L3&L[[$6E++<<

 [[%++ellK
 LD&}}UZZ446{{2UZZ?Ruzz&A{{Aell6BRu||FC r:   c                     | j                   t        j                  k(  r| j                  t        j                        } t        | |||||||||	|||||      S rD   )r^   rV   rc  r'  rb  r  )r;  r   r+  rk  rl  rm  rn  r<  r=  r>  rP  rQ  rR  r}  r  r  r  r  s                     r8   'meta__flash_attention_forward_quantizedr    sb    * {{e)))(( r:   c                     t        j                  |      }t        j                  |      }t        j                  |      }|||fS rD   r  )r  r;  r   r+  r   rI  rk  rl  rm  rn  r<  r=  ro  rp  r}  r  r  
grad_querygrad_key
grad_values                       r8   meta__flash_attention_backwardr    sA    0 !!%(J$H!!%(Jx++r:   cu_seqlens_qcu_seqlens_kmax_seqlen_qrK  custom_mask_typecausal_diagonalseqlen_kwindow_sizec                    | j                  d      }| j                  d      }|j                  d      }| j                  d      }|j                  d      }t        j                  ||||| j                  | j                        }||j                  d      dz
  n|}|}||t        d      |}||n|}|
rt        j                  |dz        dz  nd}t        j                  |||ft        j                  | j                        }t        j                  dt        j                  d	      }t        j                  dt        j                  d	      }||||||fS )
Nr   r3   r  r   r  z;max_seqlen_q must not be None when cu_seqlens_q is providedr)  r4   r   )
r   rV   r   r^   r   r   r  rC  rZ   r   )r;  r   r+  r;  r  r  r  rK  r<  r  r_  r}  r  r  r  r  r  r  rD  r  r  logsumexp_batch_dimactual_max_seqlen_qactual_max_seqlen_kr  rf  rM  rN  s                               r8   !meta__efficient_attention_forwardr    sE   , 	

1A

1AA

2I	BB
++aIrU\\
RC7C7O,++A.2VW M  +*6*B,4F		%*+b0A  	i7kk||J ;;rF;D[[5::f=F
D&*=?RRRr:   bias_requires_gradnum_splits_keyshared_storage_dqdkdvc                    |rt        j                  |j                  d   |j                  d   k(  d        t        j                  |j                  d   |j                  d   k(  d        t        j                  g |j                  dd d|j                  d   |j                  d   |j                  |j
                        }|j                  d	d      }|j                  d	d      }|j                  d	d
      }n?t        j                  |      }t        j                  |      }t        j                  |      }|z|j                  d      }|dz  dk(  r|n
|dz   |dz  z
  }t        |j                               }||d<   t        j                  ||j                  |j
                        }|dd |f   }n!t        j                  d|j
                        }||||fS )Nr3   c                       y)Nz,seqlen must match for `shared_storage_dqdkdvr4   r4   r:   r8   re   z4meta__efficient_attention_backward.<locals>.<lambda>m  rm   r:   r0   c                       y)Nz3embedding dim must match for `shared_storage_dqdkdvr4   r4   r:   r8   re   z4meta__efficient_attention_backward.<locals>.<lambda>q  rm   r:   r   r  r   r  r5  r   r(  .r4   r  )
rV   rg   r   r   r^   r   r  r   r   r   )r  r;  r   r+  r;  r  r  r  rK  rI  r<  ro  rp  r  r  r}  r  r  chunkr  r  r  r  r  r  r  s                             r8   "meta__efficient_attention_backwardr  Q  s   2 KKNciil*B	
 	KKNciil*I	
 Eekk!BEEEKKOEU[[_E++<<

 \\"a(
<<A&\\"a(
%%e,
##C(%%e,
))B-$+bLA$57R<'TV,;V%	&	"KK	DKKP	c8G8m,	KK5<<8	xY66r:   scale_ascale_bscale_resultuse_fast_accumc                     d }t        j                   j                         dk(  xr j                         dk(   fd       t        j                   | j                        xr  |j                         fd       t	               dk(  st	               dk(  rad }	d }
d	 }t        j                   |	 j                               xs  |        fd
       t        j                   |
j                               xs  |      fd       t        j                   j                  d      dz  dk(   fd       t        j                  j                  d      dz  dk(  xr j                  d      dz  dk(  fd        j                  \  j                  d      j                  t         j                  k(  xr j                  t         j                  k(  xs< j                  t         j                  k(  xr j                  t         j                  k(  }j                         dk(  rfj                         dk(  rSt        j                  j                  t         j                  k(  xr j                  t         j                  k(  d        nb|rj                  t         j                  k(  rd}dz  nd}d}t        |      }t        |d      dz  }|t        |      z  |z  |t        |      z  |z  j                         k(  r_j                         k(  rLt        j                  j                         d        t        j                  j                         d        nt        j                  dfd       nht        j                  j                  t         j                  k(  xr j                  t         j                  k(  d        t        j                  j                         dk(  xr j                         dk(  fd       j                  d      k(  ruj                  d      dk(  raj                  d      dk(  rMj                  d      k(  r9t        j                  j                         xr j                         d        nNj                  d      k(  rUj                  d      j                  d      cxk(  rt        d      k(  r"n nj                  d      t        d      k(  rnj                  d      k(  rKj                  d      j                  d      cxk(  rt        d      k(  rn nj                  d      k(  rnj                  d      t        d      k(  rKj                  d      j                  d      cxk(  rt        d      k(  rn nj                  d      k(  rnt        j                  dfd       ||n j                  }t        j                   j                  d      j                  d      | j                        S )Nc                     | t         j                  t         j                  t         j                  t         j                  t         j
                  fv S rD   rV   rc  float8_e5m2float8_e4m3fnuzfloat8_e5m2fnuzfloat4_e2m1fn_x2r   s    r8   is_fp8_or_fp4_typez2_check_scaled_mm_sizes.<locals>.is_fp8_or_fp4_type  A    !!!!""
 
 	
r:   r   c                  L    dj                          d j                          S Nz%Inputs must be 2D but got self.dim()=z and mat2.dim()=r   rJ  r   s   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>  '    7
|CSTXT\T\T^S_` r:   c                  <    dj                    d j                    S Nz?Expected both inputs to be fp8 or fp4 types but got self.dtype=z and mat2.dtype=r   r  s   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>  &    QRVR\R\Q]]mnrnxnxmyz r:   r   r   c                 ,    | d   | d   kD  xr | d   dk(  S r  r4   r  s    r8   is_row_majorz,_check_scaled_mm_sizes.<locals>.is_row_major  "    !9vay(;VAY!^;r:   c                 &    | d   dk(  xr | d   dkD  S r  r4   r  s    r8   is_col_majorz,_check_scaled_mm_sizes.<locals>.is_col_major      !9>3fQi!m3r:   c                 V    | j                  d      dk(  xs | j                  d      dk(  S r  r   	tensor_2ds    r8   has_zero_dimz,_check_scaled_mm_sizes.<locals>.has_zero_dim  )    >>!$)CY^^A->!-CCr:   c                  *    d j                          S Nz#self must be row_major, got stride r  r   s   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>      9$++-I r:   c                  *    d j                          S Nz#mat2 must be col_major, got stride r  rJ  s   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>  r  r:   r3   r(  r   c                  ,    d j                  d       S NzBExpected self.size(1) to be divisible by 16, but got self.size(1)=r3   r   r   s   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>      XY]YbYbcdYeXfg r:   c                  "    d j                    S Nz?Expected both dimensions of mat2 to be divisible by 16 but got r9  r  s   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>      UVZV`V`Uab r:   c                       y)NzNFor tensorwise scaling, both scale_a and scale_b must be float (fp32) tensors.r4   r4   r:   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>  rm   r:   r)  rA  r	  c                       y)Nzscale_a must be contiguousr4   r4   r:   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>  rm   r:   c                       y)Nzscale_b must be contiguousr4   r4   r:   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>  rm   r:   Fc            	      Z    d  dj                          d dj                          d	S )NzTInvalid blockwise scaling configuration. For blockwise scaling, scale_a should have  elements, got z, scale_b should have r  r  )expected_a_sizeexpected_b_sizer  r  s   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>  sH    FFUEVVefmfsfsfuev w//>.?w}}N__`b r:   c                       y)NzKFor rowwise scaling, both scale_a and scale_b must be float (fp32) tensors.r4   r4   r:   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>  rm   r:   c                  L    d j                         dj                         S )NzLFor non-tensorwise scaling, scale tensors must be 2D, but got scale_a.dim()=z and scale_b.dim()=r   r  r  s   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>  s,    gY`YdYdYfXhh|nunynyn{m}~ r:   c                       y)Nz@Both scale_a and scale_b must be contiguous for rowwise scaling.r4   r4   r:   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>  rm   r:   c                     d d d dt         d       d	dt         d       dt        d       d dt         d       d	z   dt         d       d d	t        d       dt         d       d	z   dt         d       d d
j                  d       dj                  d       dj                  d       dj                  d       dz   S )N}Invalid scaling configuration. For tensorwise scaling, both scales should be scalar. For rowwise scaling, scale_a should be (, 1), scale_b should be (1, >). For (BlockWise1x128, BlockWise128x128), scale_a should be (rx   rA  ), scale_b should be (<). For (BlockWise1x128, BlockWise1x128), scale_a should be (z>). For (BlockWise128x128, BlockWise1x128), scale_a should be (). Got scale_a.size()=(r   r3   ) and scale_b.size()=(ry   r9   r   )_krf  r  r  r  s   r8   re   z(_check_scaled_mm_sizes.<locals>.<lambda>7  sL   CCD#Eabcad eVVWUXXZ[cdfhk[lZmmpr 0S0A/B"XaQTEUDV WTTUSVVXYabdfiYjXkknp	p 0S0A/B"QC HVV^_`beVfUggijrsuwzj{i||AA 0S0A/B"QC H//6||A.?r',,q/AR S//6||A.?r',,q/ARRSU
U r:   r  )rV   rg   r   r^   r   r   r   r   float8_e8m0fnurc  r   r`  r9   rd  r   r   )r   rJ  r  r  r;  r  r=  r  r  r  r  r  is_blockwise_scalingblock_size_kblock_size_mnnum_k_blockspadded_num_k_blocks
_out_dtyper  r  r  rf  r  s   ````              @@@@@r8   _check_scaled_mm_sizesr    s   
 
LL
a+DHHJ!O` 
LL4::&I+=djj+Iz
 4F"k$&75&@	<	4	D 	'=<+=I	
 	'=<+=I	
 	IIaL2"g	
 	IIaL2"=tyy|b'8A'=b	
 

2IIaL !5!55 :MMU%9%99
 !4!44 9MMU%8%88 	 ==?aGMMOq$8LL.Q7==EMM3Qh " }} 3 33  "!V!M#B5L"*<";a"? M ::=PP  M ::=PP 
 ?2MMO6))+8 ))+8
  LL.Q7==EMM3Qe
 LL"9w{{}'9~ Q1$LLOq(LLOq(LLOq( ))+G0E0E0G^
 Q1$LLOw||AK(2s:KKLLOx3'77 Q1$LLOw||AK(2s:KKLLOq( Q8As#33LLOw||AK(2s:KKLLOq(  " (3J;;tyy|TYYq\DKKXXr:   c           
      &    t        | |||||||      S rD   )r  )r   rJ  r  r  r;  r  r=  r  s           r8   meta_scaled_mmr  J  s"     "dGWdL)^ r:   scale_recipe_ascale_recipe_b	swizzle_a	swizzle_bc           
         	 !"#$%& d }d }t        j                   j                         dk(  xr j                         dk(   fd       t        j                   | j                        xr  |j                         fd        j                  d     j                  d   j                  d   ! | j                        r |j                        rd}|z  |D cg c]  }t        |       }}|D cg c]  }t        |       }}rD cg c]  }t        |       c}nt        j                  g	r	D cg c]  }t        |       c}	nt        j                  g	t               dk(  st               d	k(  rd
 }d }d }t        j                   | j                               xs  |        fd       t        j                   |j                               xs  |      fd       t        j                   j                  d      dz  dk(   fd       t        j                  j                  d      dz  dk(  xr j                  d      dz  dk(  fd       dt        t
           dt        t
           fd}dt        t
           dt        t
           fd}dt        t
           dt        t
           fd}dt        t
           dt        t
           fd}dt        t
           dt        t
           fd}dt        t
           dt        t
           fd}dt        t
           dt        t
           fd}dt        t
           dt        t
           fd} |||      rt        j                  d   j                         dk(  xrZ d   j                         dk(  xrB d   j                  t         j                  k(  xr  d   j                  t         j                  k(  d        	n |||      rt        j                  d   j                  d    k(  xrr d   j                          k(  xrZ d   j                  t         j                  k(  xr8 d   j                         !k(  xr  d   j                  t         j                  k(   fd       n |||      rqd   j                  t         j                  k(  xr  d   j                  t         j                  k(  }d   %%j                  d    k(  xrk %j                  d   dz  k(  xrT %j                  d      dk(  xr> %j                  d       k(  xs( %j                  d   dk(  xr %j                  d      dk(  }d   &&j                  d   !k(  xrk &j                  d   dz  k(  xrT &j                  d      dk(  xr> &j                  d      !k(  xs( &j                  d   dk(  xr &j                  d      dk(  }t        j                  |xr |xr | !%&fd       ne |||      rd   j                  t         j                  k(  xr  d   j                  t         j                  k(  }t        dz  d       d   %%j                  d   k(  xrk %j                  d    dz  k(  xrT %j                  d      dk(  xr> %j                  d      k(  xs( %j                  d   dk(  xr %j                  d      dk(  }d   &&j                  d   !k(  xrk &j                  d   dz  k(  xrT &j                  d      dk(  xr> &j                  d      !k(  xs( &j                  d   dk(  xr &j                  d      dk(  }t        j                  |xr |xr | !%&fd!       n |||      rd   j                  t         j                  k(  xr  d   j                  t         j                  k(  }t        dz  d       d   %%j                  d    k(  xrk %j                  d   dz  k(  xrT %j                  d      dk(  xr> %j                  d       k(  xs( %j                  d   dk(  xr %j                  d      dk(  }d   &&j                  d   k(  xrk &j                  d   !dz  k(  xrT &j                  d      dk(  xr> &j                  d      k(  xs( &j                  d   dk(  xr &j                  d      dk(  }t        j                  |xr |xr | !%&fd"       nO |||      rt         j                  j                   rct#         j                  d   d#       j                  d   z  "t#         j                  d   d#       j                  d   z  #t        j                  $nt         j                  d   d      t        t#         j                  d   d#      d       z  "t        j                  d   d      t        t#         j                  d   d#      d       z  #t        j$                  $t        j                  d   j                         "k(  xrn d   j                  t         j&                  k(  xrL d   j                         #k(  xr4 d   j                  t         j&                  k(  xr d   $k(  xr 	d   $k(  "#$	fd$       n |||      rt         d      t        t#        d      d       z  "t        !d      t        t#        d      d       z  #t        j$                  $t        j                  d   j                         "k(  xrn d   j                  t         j(                  k(  xrL d   j                         #k(  xr4 d   j                  t         j(                  k(  xr d   $k(  xr 	d   $k(  "#$	fd%       n |||      rnt         d      t        t#        d      d       z  "t        !d      t        t#        d      d       z  #t        j$                  $t        j                  d   j                         "k(  xr d   j                  t         j(                  k(  xr d   j                         dk(  xr d   j                  t         j                  k(  xr d   j                         #k(  xrn d   j                  t         j(                  k(  xrL d   j                         dk(  xr4 d   j                  t         j                  k(  xr d   $k(  xr 	d   $k(  "#$	fd&       nt        j                  d' !fd(       ||n j                  }t        j*                   !| j,                  )      S c c}w c c}w c c}w c c}w )*Nc                     | t         j                  t         j                  t         j                  t         j                  t         j
                  fv S rD   r  r   s    r8   r  z5_check_scaled_mm_sizes_v2.<locals>.is_fp8_or_fp4_typeg  r  r:   c                 (    | t         j                  k(  S rD   )rV   r  r   s    r8   is_fp4_typez._check_scaled_mm_sizes_v2.<locals>.is_fp4_typep  s    ....r:   r   c                  L    dj                          d j                          S r  r   r  s   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>u  r  r:   c                  <    dj                    d j                    S r  r   r  s   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>y  r  r:   r   r3   r   r   c                 ,    | d   | d   kD  xr | d   dk(  S r  r4   r  s    r8   r  z/_check_scaled_mm_sizes_v2.<locals>.is_row_major  r  r:   c                 &    | d   dk(  xr | d   dkD  S r  r4   r  s    r8   r  z/_check_scaled_mm_sizes_v2.<locals>.is_col_major  r  r:   c                 V    | j                  d      dk(  xs | j                  d      dk(  S r  r   r  s    r8   r  z/_check_scaled_mm_sizes_v2.<locals>.has_zero_dim  r  r:   c                  *    d j                          S r  r  r   s   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  r  r:   c                  *    d j                          S r  r  r  s   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  r  r:   r(  c                  ,    d j                  d       S r  r   r   s   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  r  r:   c                  "    d j                    S r  r9  r  s   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  r  r:   recipe_arecipe_bc                     t        |       dk(  xr> t        |      dk(  xr. | d   t        j                  k(  xr |d   t        j                  k(  S r  )r   r(   
TensorWiser   r!  s     r8   is_tensorwisez0_check_scaled_mm_sizes_v2.<locals>.is_tensorwise  sT    H" :MQ&:QK;#9#99: QK;#9#99	r:   c                     t        |       dk(  xr> t        |      dk(  xr. | d   t        j                  k(  xr |d   t        j                  k(  S r  )r   r(   RowWiser$  s     r8   
is_rowwisez-_check_scaled_mm_sizes_v2.<locals>.is_rowwise  sT    H" 7MQ&7QK;#6#667 QK;#6#66	r:   c                     t        |       dk(  xr> t        |      dk(  xr. | d   t        j                  k(  xr |d   t        j                  k(  S r  )r   r(   BlockWise1x32r$  s     r8   is_mxz(_check_scaled_mm_sizes_v2.<locals>.is_mx  sT    H" =MQ&=QK;#<#<<= QK;#<#<<	r:   c                     t        |       dk(  xr> t        |      dk(  xr. | d   t        j                  k(  xr |d   t        j                  k(  S r  )r   r(   BlockWise1x16r$  s     r8   is_nv_single_levelz5_check_scaled_mm_sizes_v2.<locals>.is_nv_single_level  sV     H" =MQ&=QK;#<#<<= QK;#<#<<	r:   c                     t        |       dk(  xrn t        |      dk(  xr^ | d   t        j                  k(  xrF | d   t        j                  k(  xr. |d   t        j                  k(  xr |d   t        j                  k(  S )Nr   r   r3   )r   r(   r-  r#  r$  s     r8   is_nvz(_check_scaled_mm_sizes_v2.<locals>.is_nv  s    H" :MQ&:QK;#<#<<: QK;#9#99: QK;#<#<<	:
 QK;#9#99r:   c                     t        |       dk(  xr> t        |      dk(  xr. | d   t        j                  k(  xr |d   t        j                  k(  S r  )r   r(   BlockWise1x128r$  s     r8   is_1x128_1x128z1_check_scaled_mm_sizes_v2.<locals>.is_1x128_1x128  sT    H" >MQ&>QK;#=#==> QK;#=#==	r:   c                     t        |       dk(  xr> t        |      dk(  xr. | d   t        j                  k(  xr |d   t        j                  k(  S r  )r   r(   r2  BlockWise128x128r$  s     r8   is_1x128_128x128z3_check_scaled_mm_sizes_v2.<locals>.is_1x128_128x128  sW    H" @MQ&@QK;#=#==@ QK;#?#??	r:   c                     t        |       dk(  xr> t        |      dk(  xr. | d   t        j                  k(  xr |d   t        j                  k(  S r  )r   r(   r5  r2  r$  s     r8   is_128x128_1x128z3_check_scaled_mm_sizes_v2.<locals>.is_128x128_1x128  sT    H" >MQ&>QK;#?#??> QK;#=#==	r:   c                       y)Nz\For Tensorwise scaling, both scale_a and scale_b must be single element float (fp32) tensorsr4   r4   r:   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  rm   r:   c            	          dj                   d    dd   j                          d j                   d    dd   j                          d	S )Nz'For Rowwise scaling, scale_a must have r   z elements (got: z), and scale_b must have r3   ry   )r   r   )rJ  r  r  r   s   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>
  sa    =djjm_L\]def]g]m]m]o\p q//3zz!}o=MgVWjN^N^N`Maabd r:   rA  c                      d d dz   dj                    d dj                   d d dz   dj                    d dj                   dS )Nz>For 1x128 x 1x128 blockwise scaling, scale a must have shape [rx   rA  ] (got: ) and stride [1, )scale b must have shape [ry   r   r   )rs  r  r  sasbs   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>'  s~    001s"Q#XJhrxxjPabcaddlmomvmvlw x001s"Q#XJhrxxjPabcaddlmomvmvlwwxz r:   r	  c                      d ddz   dj                    d dj                   d d dz   dj                    d dj                   dS )Nz]For 128x128 x 1x128 blockwise scaling, L4 = {round_up(K / 128, 4)}, scale a must have shape [rx   rA  r<  r=  r>  ry   r?  rs  L4r  r  r@  rA  s   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>F  s~    002t2a3hZxzQbcebffnoqoxoxny z001s"Q#XJhrxxjPabcaddlmomvmvlwwxz r:   c                      d d dz   dj                    d dj                   d ddz   dj                    d dj                   dS )Nz]For 1x128 x 128x128 blockwise scaling, L4 = {round_up(K / 128, 4)}, scale a must have shape [rx   rA  r<  r=  r>  ry   r?  rC  s   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>e  s~    001s"Q#XJhrxxjPabcaddlmomvmvlw x002t2a3hZxzQbcebffnoqoxoxnyyz| r:   r)  c                      d  dd   j                          d dd   j                          dt        j                   dd   j                   dd   j                   d d	d    dd    d
S )Nz!for MX scaling scale_a must have  (got: r   ) and scale_b must have z). Scales must have types z (for self: 	, mat_b: z) Must have swizzle type  (got self: ry   )r   rV   r  r^   expected_scale_a_elemsexpected_scale_b_elemsexpected_swizzler  r  r  r  s   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  s    78N7OwW^_`WaWgWgWiVj k--C,DGGTUJL\L\L^K_ `""'"6"6!7|GAJDTDTCUU^_fgh_i_o_o^p q..>-?|IVWL>Ybclmncobppqs r:   c                      d  dd   j                          d dd   j                          d dd    dd    dS )	Nz.for single-level NV scaling scale_a must have rG  r   rH  ). Must have swizzle type rJ  rI  ry   r  rK  s   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  sw    DE[D\\cdklmdndtdtdvcw x--C,DGGTUJL\L\L^K_ `$$4#5\)A,yYbcdYeXffgi r:   c                      d  dd   j                          d dd   j                          d dd    dd    dS )	Nz!for NV scaling scale_a must have rG  r   rH  rP  rJ  rI  ry   r  rK  s   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  sv    78N7OwW^_`WaWgWgWiVj k--C,DGGTUJL\L\L^K_ `$$4#5\)A,yYbcdYeXffgi r:   Fc                  b   d d d dt         d       d	dt         d       dt        d       d dt         d       d	z   dt         d       d d	d
   j                  d
       dd
   j                  d       dd
   j                  d
       dd
   j                  d       dz   S )Nr  r  r  rx   rA  r  r  r   r  r   r3   r  ry   r  )rs  r  r  r  r  s   r8   re   z+_check_scaled_mm_sizes_v2.<locals>.<lambda>  s   ??@cA]^_]` aRRSQTTVW_`acfWgVhhkm ,HQ,<+=RC@P?Q RPPQsRTU]^_adUeTffik	k ,HQ,<+=Rs C++21:??1+=*>bQRAS@T U++21:??1+=*>bQRAS@TTUWW r:   r  )rV   rg   r   r^   r   r(   r)   
NO_SWIZZLEr   r   r   r   r   r`  r?   r  r  r9   SWIZZLE_32_4_4r  rc  r   r   )'r   rJ  r  r  r  r  r;  r=  r  r  r  r  r  K_packed_multipliersir  r  r  r%  r(  r+  r.  r0  r3  r6  r8  types_ok
scale_a_ok
scale_b_okr  rs  rD  r  r  rL  rM  rN  r@  rA  s'   ``` `   ``                    @@@@@@@@@r8   _check_scaled_mm_sizes_v2rZ  Z  s$   
/ 
LL
a+DHHJ!O` 
LL4::&I+=djj+Iz 	

1A

1A

1A 4::;tzz#:	  0>?"k"o?N?0>?"k"o?N?/89[_9	 ""
	 /89[_9	 ""
	 4F"k$&75&@	<	4	D 	'=<+=I	
 	'=<+=I	
 	IIaL2"g	
 	IIaL2"=tyy|b'8A'=b	

	D$5 	kAR 		k!2 	d;>O 		D- 	k9J 		;'	373D		D- 	k9J 		T+%6 	${BS 		tK'8 	DDU 		tK'8 	DDU 	 8LL
  "a' 6AJ$$&!+6AJ$$56 AJ$$5v 7LL
  #q( 6AJ$$&!+6AJ$$56 AJ$$&!+6 AJ$$5
 NN;
 
  EMM1Wgaj6F6F%--6W  Bq  THHQK18+TIIaLA%T YYq\Q&R288A;!+;+Q		!PQ@Q	  Bq  THHQK18+TIIaLA%T YYq\Q&R288A;!+;+Q		!PQ@Q	  LL6Z6J nn= 
  EMM1Wgaj6F6F%--6W  !c'1%BBr! UHHQK18+UIIaLA%U YYq\R'SBHHQK1,<,R1QRAR	  Bq  THHQK18+TIIaLA%T YYq\Q&R288A;!+;+Q		!PQ@Q	  LL6Z6J  nn= 
  EMM1Wgaj6F6F%--6W  !c'1%BBq  THHQK18+TIIaLA%T YYq\Q&R288A;!+;+Q		!PQ@Q	  Br! UHHQK18+UIIaLA%U YYq\R'SBHHQK1,<,R1QRAR	  LL6Z6J  >>2}}  )1$**Q-)DtzzRS})T&)1$**Q-)DtzzRS})T&#.#9#9 )1$**Q-)ETZZ]B/I *& *2$**Q-)ETZZ]B/I *& $/#=#= LL
  "&<< 5AJ$$(<(<<5AJ$$&*@@5 AJ$$(<(<<5 aL$44	5
 aL$44   ?%-a%5!RRS8T%T"%-a%5!RRS8T%T"*99LL
  "&<< 5AJ$$(;(;;5AJ$$&*@@5 AJ$$(;(;;5 aL$44	5
 aL$44  >>2%-a%5!RRS8T%T"%-a%5!RRS8T%T"*99LL
  "&<< 	5AJ$$(;(;;	5AJ$$&!+	5 AJ$$5	5 AJ$$&*@@		5
 AJ$$(;(;;	5 AJ$$&!+	5 AJ$$5	5 aL$44	5 aL$44 $ LL
 (3J;;q!:dkkBBm	 @? : :s   0ww"wwr  contraction_dimsc                 .    t        | |||||||	|||      S )N)r;  r=  r  r  r  )rZ  )r   rJ  r  r  r  r  r  r  r;  r  r[  r  s               r8   meta_scaled_mm_v2r]    s2     %% r:   c                 Z    t        | ||||d       | j                  | j                        S NT)r  r/  r   r   r   r*  rk  ri  s         r8   meta_scatter_reduce_twora    s)     dCVTJ>>$**%%r:   c                 (    t        | ||||d       | S r_  r3  r`  s         r8   meta_scatter_reduce__tworc    s    dCVTJKr:   c                t    t        j                  d j                         cxk  xr dk  nc  fd        j                         dk(  r0t        j                  |t         j                   j
                        S t        j                   j                  d      |t         j                   j
                        S )Nr   r   c                  *    d j                          S )NzAThe probability distributions dimensions must be 1 or 2, but got r   r  s   r8   re   z"meta_multinomial.<locals>.<lambda>  s    STYT]T]T_S`a r:   r3   r  )rV   rg   r   r   r   r   r   )r   num_samplesreplacementr  s   `   r8   meta_multinomialrh    s|     
LL	EIIK1a yy{a{{;ejjNN;;

1{%**U\\ r:   c                 "    d}| D ]  }||z  }	 |S r2   r4   )vsra  vs      r8   multiply_integersrl    s$    	A 	QHr:   c                 L    t        j                  t              k(  fd       dz   t        j                  t               k(   fd       t        j                  t        d  dd  D              xr t        d D               fd        d d \  }}||gS )Nc                  &    d  dt               S )Nz%It is expected output_size equals to , but got size r  )num_spatial_dimsr{  s   r8   re   z'upsample_common_check.<locals>.<lambda>  s    78H7IY\]hYiXjk r:   r   c                  &    d  dt               S )Nz$It is expected input_size equals to ro  r  )expected_input_dimsrQ  s   r8   re   z'upsample_common_check.<locals>.<lambda>	  s    67J6K?[^_i[jZkl r:   c              3   &   K   | ]	  }|d kD    ywr   Nr4   rr   r  s     r8   rt   z(upsample_common_check.<locals>.<genexpr>  s     *aAE*   c              3   &   K   | ]	  }|d kD    ywrt  r4   ru  s     r8   rt   z(upsample_common_check.<locals>.<genexpr>  s     2NQ1q52Nrv  c                      d  d S )NzDInput and output sizes should be greater than 0, but got input size z and output size r4   )rQ  r{  s   r8   re   z'upsample_common_check.<locals>.<lambda>  s      \!2;-A r:   )rV   rg   r   r  )rQ  r{  rp  r  channelsrr  s   ```  @r8   upsample_common_checkrz    s    	LLK,,k +Q.	LLJ..l
 
LL*:ab>**Ns2N+2N/N	A ""1~FHH+{++r:   c                 4    t        j                   j                         dk7  xs t         j	                         dd         fd       t         j	                         |d      } j                  |      j                  t        j                               S )Nr   r3   c                  *    d j                          S )Nz>Non-empty 3D data tensor expected but got a tensor with sizes r   r  s   r8   re   z$upsample_nearest1d.<locals>.<lambda>      PQVQ[Q[Q]P^_ r:   rp  r   
rV   rg   r   rl  r   rz  r   r'  rO   r   )r   r{  scalesfull_output_sizes   `   r8   upsample_nearest1dr         
LLA/

QR0@A_ -

kA ??+,//11%8 0  r:   c                     t        j                   j                         dk7  xs t         j	                         dd         fd       t         j	                         |d      } j                  |      }t        j                         } j                  \  }}}} j                  j                  dk(  r|dk  rt         j                  }|j                  |      }|S )	Nr   r3   c                  *    d j                          S Nz>Non-empty 4D data tensor expected but got a tensor with sizes r   r  s   r8   re   z$upsample_nearest2d.<locals>.<lambda>,  r}  r:   r   r~  r   r	  r   )rV   rg   r   rl  r   rz  r   rO   r   r   r   rz   r   
contiguous)	r   r{  scales_hscales_wr  r   r   rR   
n_channelss	   `        r8   upsample_nearest2dr  &  s     
LLA/

QR0@A_ -

kA __-.F //6M  ++Az1a||F"zA~//];FMr:   r{  rQ  r  r  c                 X    t        ||d      t        j                   j                  dk(   fd       t	        d      D ]2  t        j                   j                           k(   fd       4  j                  |      j                  t        j                               S )Nr   r~  r	  c                  "    d j                    S NzFExpected grad_output to be a tensor of dimension 4 but got: dimension r  r  s   r8   re   z-upsample_nearest2d_backward.<locals>.<lambda>R      XYdYiYiXjk r:   c            
      D    d d     d dj                         S )NzCExpected grad_output to have the same shape as output; output.size() = z but got grad_output.size(r   r  r  r   s   r8   re   z-upsample_nearest2d_backward.<locals>.<lambda>W  s>      !s$'7':&;,QCtK4D4DQ4G3HJ r:   r   )
rz  rV   rg   r   r   r   r   r'  rO   r   )r  r{  rQ  r  r  r  r   s   `    @@r8   upsample_nearest2d_backwardr  @  s     -K! 
LLAk 1X 
Q#3A#66	

   ,//11+> 0  r:   c                 4    t        j                   j                         dk7  xs t         j	                         dd         fd       t         j	                         |d      } j                  |      j                  t        j                               S )Nr   r3   c                  *    d j                          S )Nz>Non-empty 5D data tensor expected but got a tensor with sizes r   r  s   r8   re   z$upsample_nearest3d.<locals>.<lambda>i  r}  r:   r0   r~  r   r  )r   r{  scales_dr  r  r  s   `     r8   upsample_nearest3dr  c  r  r:   c                    t        j                  |       t        j                  | t         j                        }}||t        |t              st        dt        |             t        |t              st        dt        |             |j                  }|j                         }	t        ||      }t        ||      }|j                  ||	       |j                  ||	       t        ||       t        ||       ||fS ||fS )Nr   zvalues must be TensorLike, got z indices must be TensorLike, got )r  r  )rV   r   r   rp   r   r   rz   r   r   r!   r   r#   )
r   stabler   
descendingr   r   rk  r   r   
out_strides
             r8   	meta_sortr  s  s     D!5#3#3D#LqAg1&*- #B4<.!QRR':. #CDM?!STT GG	XXZ
"695#GY79j1Iz2F3G4wa4Kr:   c           	          t        j                   j                  dk(   fd       t        j                   j                  j                  k(   fd        j	                  d      t        j                  j                  dk(  fd       t        j                  j                         k(  fd       t        j                  j                  j                  k(  fd       t        j                  j                  dk(  fd        j	                  d	      z  z  t        j                  j                         k(   fd
       t        j                  t         fdfD              d        y )Nr   c                  "     j                    dS Nz != 2r  )input_gatess   r8   re   z%rnn_cell_checkSizes.<locals>.<lambda>      ;3C3C2DE0J r:   c                  :    j                    d j                    S NrC  r9  )hidden_gatesr  s   r8   re   z%rnn_cell_checkSizes.<locals>.<lambda>  s     ;$$%T,*<*<)=> r:   r3   c                  "     j                    dS )Nz != 1r  )
input_biass   r8   re   z%rnn_cell_checkSizes.<locals>.<lambda>  s    joo5Fe3L r:   c                  .    j                          d  S r  r  )
gates_sizer  s   r8   re   z%rnn_cell_checkSizes.<locals>.<lambda>  s    z'')*$zl; r:   c                  :    j                    d j                    S r  r9  )hidden_biasr  s   r8   re   z%rnn_cell_checkSizes.<locals>.<lambda>  s     z''([->->,?@ r:   c                  "     j                    dS r  r  )prev_hiddens   r8   re   z%rnn_cell_checkSizes.<locals>.<lambda>  r  r:   r   c            
      `    j                          dj                  d       d d d  d
S )NrC  r   z * z // z (aka ry   )r   r   )expected_prev_hidden_numelfactorr  r  r  s   r8   re   z%rnn_cell_checkSizes.<locals>.<lambda>  sB    ;$$&'tK,<,<Q,?+@J<tTZS[[ab|a}}~ r:   c              3   P   K   | ]  }|j                   j                   k(    y wrD   r  )rr   r=   r  s     r8   rt   z&rnn_cell_checkSizes.<locals>.<genexpr>  s(      
 HH***
s   #&c                       y)Nz%expected all inputs to be same devicer4   r4   r:   r8   re   z%rnn_cell_checkSizes.<locals>.<lambda>  rm   r:   )rV   rg   r   r   r   r   r  )r  r  r  r  r  r  r  r  s   ``````@@r8   rnn_cell_checkSizesr    s@    
LL!!Q&(JK	LL\///> !!!$JZ__)+LM*,;	
 	 1 11@	
 
LL!!Q&(JK!,!1!1!!4z!AV!K	LL99 
LL 
"J[I
 	
 	8r:   c                 
   t        | |||d|       t        j                  | t        j                        }t        j                  |t        j                        }t        j                  |t        j                        }|||fS )Nr	  r   )r  rV   r   r   )r  r  cxr  r  	workspacehycys           r8   _thnn_fused_lstm_cell_metar    sk     \:{ArR  E<S<STI			"E,C,C	DB			"E,C,C	DBIr:   c                 b   t        |      dk7  }|r t        |      }|d   }| j                  d   }nB|
r| j                  d   n| j                  d   }|
r| j                  d   n| j                  d   }d}|rdnd}|dk7  r|n|}|r|||z  g}n|
r||||z  gn||||z  g}| j                  |      }|	|z  ||g}|"t        j                  d| j
                        }n|j                  |      }|j                  |	|z  ||g      }|rdnd}| j                  |t        j                        }|||||fS )Nr   r3   r   r   r  r   )r   r   r   rV   r   r   r  )r   r9  weight_stride0
weight_bufhxr  r]  hidden_size	proj_size
num_layersbatch_firstdropouttrainbidirectionalbatch_sizesdropout_stateis_input_packed
seq_length
mini_batchbatch_sizes_sumnum_directionsout_sizer   r   
cell_shaper  r  reserve_shapereserves                                r8   
_cudnn_rnnr    sS   & +&!+O%
 ^
++a.'2U[[^A
'2U[[^A
'QQN%NyH$h&?@	  X%>?j(^*CD 	
 __Y'F~-z;GJ	z[[5<<0\\*%	zN2JI	JB AAMoom5;;o?G2r7J..r:   c                     d}|D ])  }|j                         dkD  s||j                         z  }+ | j                  |f      }t        | |||||||d|||	|
|||      S rs  )r   r   r  )r   r9  r  r  r  r]  r  r  r  r  r  r  r  r  total_weight_elemsr  r  s                    r8   
miopen_rnnr    s    &  ,779q=!'')+, "4!67J

	! r:   c                 (   |r| j                   d   n| j                   d   }|r| j                   d   n| j                   d   }|
}|r|||gn|||g}| j                  |      }|"t        j                  d| j                        }n|j                  |j                         }|"t        j                  d| j                        }n|j                  |j                         }t        j                  d| j                  t        j
                        }||||fS )Nr3   r   r  r   )r   r   rV   r   r   r  )r   w0w1w2w3hx_cx_r   r  r]  r  r  
has_biasesr  r  r  r  r  output_chanelsr   r   r  r  r  s                           r8   mkldnn_rnn_layerr  +  s    & $/QEKKNJ#.QEKKNJ N  
Z0*n5 
 __Y'F
{[[5<<0]]399%
{[[5<<0]]399%Aell%++FI2r9$$r:   c                     | j                   dk(  r%t        j                  dk(  xs dk(  fd       y t        j                  | j                        dk7  fd       y )Nr   r   c                       d  S )Nz4: Expected reduction dim -1 or 0 for scalar but got r4   r   r  s   r8   re   z'zero_numel_check_dims.<locals>.<lambda>W  s    wiSTWSXY r:   c                       d  dS )Nz: Expected reduction dim z to have non-zero size.r4   r  s   r8   re   z'zero_numel_check_dims.<locals>.<lambda>\  s    wi8=TU r:   )r   rV   r   r   )r   r   r  s    ``r8   zero_numel_check_dimsr  S  sR    yyA~1H!r	Y	

 	IIcNaU	
r:   c                      |(t        ||j                               }t        ||        y t        j                  |j                         dk7   fd       y )Nr   c                        dS )Nz@: Expected reduction dim to be specified for input.numel() == 0.r4   r&  s   r8   re   z%check_argmax_argmin.<locals>.<lambda>h  s    tf\] r:   )r   r   r  rV   rg   r   )r  r   r   s   `  r8   check_argmax_argminr  a  sC    
S$((*-dC.JJLA]	
r:   c                     t        d| |       t        j                  | j                  ||fnd       }t	        | ||      }| j                  |t        j                        S )Nargmaxr   )r  rO   r  r   r  r   rV   r   )r   r   r  r  r   s        r8   argmax_argmin_metar  l  sQ    $,

coSF4PD$T49E>>%u{{>33r:   c                 |    |t         j                  k(  rt         j                  }t        j                  d||||      S )Nr4   r
  )rV   jaggedr  r   )r  r^   r   r   r   s        r8   scalar_tensorr  t  s5    
 ;;
%v* r:   c                    t        || j                         d      }| j                         dk(  rdn| j                  |      }t        j                  |dk\         t        j                  ||k  d        t        | j                        }t        |      dkD  r|||<   | j                  |      | j                  |t        j                        fS )NTr  r   r3   c                       y)Nzk not in range for dimensionr4   r4   r:   r8   re   ztopk_meta.<locals>.<lambda>  rm   r:   r   )
r   r   r   rV   rg   r   r   r   r   r   )r   rH  r   largestrY  	sliceSizetopKSizes          r8   	topk_metar    s     dhhjd
;CXXZ1_$))C.I	LLa	LLi!GHDJJH
8}q>>(#T^^HEKK^%PPPr:   c                     ||t        d      |j                         }| j                         }	t        j                  ||	j                  |	j
                  |	j                        S )Nz;segment_reduce(): Either lengths or offsets must be defined)r^   r   r   )r   r  rV   r   r^   r   r   )
rL  r   r{  rk  rv  rw  rx  rz  data_contiggrad_contigs
             r8   meta__segment_reduce_backwardr    sh    
 7?I
 	
 //#K//#K!!!!	 r:   c                    ddl m} t        | j                         d      | j                         dkD  r| j	                        nd}t        j                   ||dk\  ||k        fd       t        | j                  d  | j                  dz   d  z         }|r%| j                         dkD  r|j                  d       | j                  |      | j                  |t
        j                        fS )Nr   )r;  Tr  r3   c                      d  S )Nz9kthvalue(): selected number k out of range for dimension r4   r   s   r8   re   zkthvalue_meta.<locals>.<lambda>  s    KC5Q r:   r   )r$  r;  r   r   r   rV   rg   r   r   r/  r   r   )r   rH  r   r  r;  dimSizer   s     `    r8   kthvalue_metar    s     >
dhhjd
;C $
QdiinAG	LLQW%Q
 DS!DJJsQwy$99:E488:>S!>>% $..ekk."JJJr:   c                    | | n|}t        j                  |j                         dk(  d        |j                         }| (t        j                  | j                         |k(  d        |(t        j                  |j                         |k(  d        t        j                  |j                         |k(  d        t        j                  |j                         |k(  d        t        j                  |j                         dk(  d        t        j                  |j	                         |d   |d	   z  d
z  k(  d        y )Nr   c                       yN r4   r4   r:   r8   re   z(checkLSTMBackwardSizes.<locals>.<lambda>  rm   r:   c                       yr  r4   r4   r:   r8   re   z(checkLSTMBackwardSizes.<locals>.<lambda>  rm   r:   c                       yr  r4   r4   r:   r8   re   z(checkLSTMBackwardSizes.<locals>.<lambda>  rm   r:   c                       yr  r4   r4   r:   r8   re   z(checkLSTMBackwardSizes.<locals>.<lambda>  rm   r:   c                       yr  r4   r4   r:   r8   re   z(checkLSTMBackwardSizes.<locals>.<lambda>  rm   r:   c                       yr  r4   r4   r:   r8   re   z(checkLSTMBackwardSizes.<locals>.<lambda>  rm   r:   r   r3   r	  c                       yr  r4   r4   r:   r8   re   z(checkLSTMBackwardSizes.<locals>.<lambda>  rm   r:   )rV   rg   r   r   r   )grad_hygrad_cyr  r  r  defined_gradexp_sizes          r8   checkLSTMBackwardSizesr	    s    %17wL	LL!!#q(*5  "HW\\^x/<W\\^x/<	LLh&
3	LLh&
3	LLA%z2	LL"hqkHQK&?!&CCZPr:   c                     | |yt        | ||||       t        j                  |t              }t        j                  |t              }|r|j	                  dd      nd }|||fS )NNNNr   r   F)r  )r	  rV   r   legacy_contiguous_memory_formatr  )	r	  r	  r  r  r  has_bias
grad_gatesgrad_cxr  s	            r8   #_thnn_fused_lstm_cell_backward_implr	    sl    7?7GRY?!!!@J r1PQG4<
q%0$Iw	))r:   c                    d }d }d }|d   r|j                  | j                               }|d   s|d   rQ|j                  |j                  d      | j                  d      f      }|j                  |j                  d            }|||fS )Nr   r3   r   r   r  )r  r  r  r  r  grad_weightr  s          r8   linear_backwardr	    s    JKI1~!++FKKM:
1~Q",,l.?.?.CV[[QS_-UV **<+<+<R+@A	Y//r:   c                     t         j                        dkD  r j                  d   ||z  z  dk(  st        d j                   d|       d  fd} j                  d   ||z  z  } j                  d   |z  } j                  d	   |z  }g  j                  d d |||} j                  |      }|j	                   |       
      }|S )Nr   r5  r   z'Invalid input shape for pixel_shuffle: z with upscale_factor = c                 b    t         j                  j                  |       t         j                  k(  S rD   r  r  s    r8   r  z,meta_pixel_shuffle.<locals>.is_channels_last  s$    ""88=ATATTTr:   c                  2           r.t              dk(  rt        j                  S t        j                  S j	                  t        j                        rt        j                  S j	                  t        j
                        rt        j
                  S y )Nr   r   )r   rV   r   r  rd  preserve_format)r  r   s   r8   r  z.meta_pixel_shuffle.<locals>.pick_memory_format  s|    D!4 F*...***e.E.EF***e.C.CD((( Er:   r  r   r   )r   r   r   r   r'  )	r   upscale_factorr  r  HrWrr   r   r  s	   `       @r8   meta_pixel_shuffler	    s     	DJJ!

2.>2Q RVW W5djj\AXYgXhi
 	
U	) 	

2>N:;A	B.	(B	B.	(B-$**Sb/-1-b-"-I
..
#C
&&13&
4CJr:   c                 X   | j                  | j                        }|j                  |j                        }|j                  |j                        }|j                  |j                        }|j                  |j                        }|j                  |j                        }|||||||fS rD   r  )r   weight0weight1weight2weight3r  cx_tmpr   hy_cy_grad_output_r_optgrad_hy_r_optgrad_cy_r_optr   r]  r  r  r  r  r  r  r  r  diff_xdiff_hxdiff_cxdiff_w1diff_w2diff_bs                                r8   mkldnn_rnn_layer_backwardr,	    s    4 __U[[)FmmCII&Gv||,G.G.Gw}}-F7GVVWgEEr:   )	out_int32r   c                    t        j                  | |rt         j                  nt         j                  t         j                        S )Nr^   r   )rV   r   rC  r   r   r   
boundariesr-	  r   s       r8   meta_bucketizer2	  %  s2     &ekkEKK-- r:   r1	  r-	  r   c                r    |j                  d|rt        j                        S t        j                        S )Nr4   r   )r   rV   rC  r   r0	  s       r8   meta_bucketize_scalarr4	  /  s>     
&ekk    ,1KK    r:   c                     dt               dk(  r't        j                   j                          fd       t               dk(  r% j                         rt	        j
                  d       t        j                  t        t              fd       t        j                  dkD  fd       t        j                  t        t              fd	       t        j                  t        t              fd
       t        j                  k\  fd       t        j                   j                   j                        S )Nzhistc()r  c                  $    d j                    dS )Nz%"histogram_cpu" not implemented for 'r  r   r  s   r8   re   zmeta_histc.<locals>.<lambda>D  s    =ekk]!L r:   r   z%_histc_cuda with floating point inputc                  $     dt                S )Nz#: argument 'bins' must be int, not r  binsr  s   r8   re   zmeta_histc.<locals>.<lambda>J  s    7)>tDzlK r:   r   c                       d  S )Nz: bins must be > 0, but got r4   r8	  s   r8   re   zmeta_histc.<locals>.<lambda>L  s    gY.J4&#Q r:   c                  $      dt               S )Nz%: argument 'min' must be Number, not r  )r  r   s   r8   re   zmeta_histc.<locals>.<lambda>O      7)@cL r:   c                  $      dt               S )Nz%: argument 'max' must be Number, not r  )r  r  s   r8   re   zmeta_histc.<locals>.<lambda>S  r<	  r:   c                        dS )Nz: max must be larger than minr4   )r  s   r8   re   zmeta_histc.<locals>.<lambda>U  s    y0M%N r:   r   )r   rV   rg   r   rO   r  rp   r   r   r   r   r^   )r   r9	  r   r  r  s   ````@r8   
meta_histcr?	  =  s     G5U"##%L	
 5V#(?(?(A%%&MN	LL4!K 
LLQR	LL3L 
LL3L 
LLNO;;tELLDDr:   c                 B    t         j                         |d      }t        j                   j	                         dk7  xs# t        d  j                         dd  D               fd        j                  |      j                  t        j                               S )Nr   r~  r   c              3   &   K   | ]	  }|d kD    ywrt  r4   )rr   r   s     r8   rt   z,meta_upsample_bimode2d_aa.<locals>.<genexpr>g  s     !Ht$(!Hrv  r3   c                  *    d j                          S r  r   r  s   r8   re   z+meta_upsample_bimode2d_aa.<locals>.<lambda>h  r}  r:   r   )
rz  r   rV   rg   r   r  r   r'  rO   r   )r   r{  r  r  r  r  s   `     r8   meta_upsample_bimode2d_aarC	  Y  s     -

kA 
LLHc!Huzz|AB7G!HH_ ??+,//11%8 0  r:   c                 T    t        ||d      t        j                   j                  dk(   fd       t	        d      D ]0  t        j                   j
                        k(   fd       2  j                  |      j                  t        j                               S )Nr   r~  r	  c                  "    d j                    S r  r  r  s   r8   re   z4meta_upsample_bimode2d_aa_backward.<locals>.<lambda>}  r  r:   c            
      D    d d     d dj                         S )NzD
Expected grad_output to have the same shape as output; output.size(r  z
but got grad_output_size(r   r  s   r8   re   z4meta_upsample_bimode2d_aa_backward.<locals>.<lambda>  s@     DDE3dK[\]K^J_ `D!1!1!!4 59 r:   r   )
rz  rV   rg   r   r   r   r   r'  rO   r   )r  r{  rQ  r  r  r  r  r   s   `     @@r8   "meta_upsample_bimode2d_aa_backwardrG	  o  s     -K! 
LLAk 1X 
a $4Q$779	

   ,//11+> 0  r:   c                 P   t        j                  |j                         dk(  d        t        j                  |j                         dk(  d        t        j                  |j                  j                  d        t        j                  |j                  j                  d        y )Nr3   c                       y)Nz%found_inf must be a 1-element tensor.r4   r4   r:   r8   re   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>  rm   r:   c                       y)Nz%inv_scale must be a 1-element tensor.r4   r4   r:   r8   re   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>  rm   r:   c                       y)Nz!found_inf must be a float tensor.r4   r4   r:   r8   re   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>  rm   r:   c                       y)Nz!inv_scale must be a float tensor.r4   r4   r:   r8   re   z<_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>  rm   r:   )rV   rg   r   r^   r   )r   r  	inv_scales      r8   *_amp_foreach_non_finite_check_and_unscale_rN	    s|    	LLQ O 
LLQ O 
LL))3 
LL))3r:   c                 ,    t        j                  |       S rD   r  )r   nanposinfneginfs       r8   
nan_to_numrS	    rv  r:   c                    | j                   t        j                  t        j                  t        j                  t        j
                  hv rt        d| j                    d      | j                  }t        ||      }t        ||      }||k(  r| S t        | j                               }t        | j                               }||   ||   c||<   ||<   ||   ||   c||<   ||<   | j                  ||       | S )Nz>torch.transpose_: in-place transposition is not supported for z layout)r   rV   r  
sparse_cscr  
sparse_bscr   r   r   r   r   r   r   )r   dim0rf  ndimsr   r   s         r8   r  r    s    {{	  LT[[MY`a
 	
 IIE$&D$&Dt|		D$++- F!'vd|F4L&,!$ZdDJT
T6"Kr:   c                    | j                   }| j                  r<| j                         }| j                         }|dk  r|dk(  s4t	        d| d| d      | j                         dkD  rt	        d| d      t        | d|dk  rd      S d      S )	Nr   r   zEt_ expects a tensor with <= 2 sparse and 0 dense dimensions, but got z sparse and z dense dimensionsz6t_ expects a tensor with <= 2 dimensions, but self is rM  r3   )r   r  r  r  r   r   r  )r   rX	  r  r  s       r8   t_rZ	    s    IIE~~__&
NN$	aIN %,l9+=NP 
 88:> HqQ  dAEAIq55155r:   )r-	  r   sidesorterc                ^    t        j                  t         j                        dk  xs  j                  d d j                  d d k(   fd       t        j                  d u xs  j                  j                  k(   fd       t        j                  |dk7  xs | d        |rt         j                  nt         j
                  }t        t         j                        r&t        j                  |t         j                        S t        j                  d| j                  	      S )
Nr3   r   c                  `    dt        j                         dt         j                         S )Nztorch.searchsorted(): boundaries tensor should be 1 dimension or the first N-1 dimensions of boundaries tensor and input value tensor must match, but we got boundaries tensor z and input value tensor r   r   )r   sorted_sequences   r8   re   z#meta_searchsorted.<locals>.<lambda>  s8    3378M8M3N2O P""&tzz"2!35 r:   c                  l    dt         j                         dt        j                         S g  S )Nz[torch.searchsorted(): boundary and sorter must have the same size, but got boundary tensor z and got sorter tensor r_	  )r`	  r\	  s   r8   re   z#meta_searchsorted.<locals>.<lambda>  sO    ##'(=(=#>"??V%+%7tFLL!@B  >@@B r:   r   c                       y)Nzetorch.searchsorted(): side and right can't be set to opposites, got side of left while right was Truer4   r4   r:   r8   re   z#meta_searchsorted.<locals>.<lambda>  rm   r:   r/	  r4   r  )rV   rg   r   r   rC  r   rp   r   r   r   r   r   )r`	  r   r-	  r   r[	  r\	  r^   s   ``   ` r8   meta_searchsortedrc	    s     
LLO!!"a' 	9  "%CR8	
	 
LL$?///6<<?	
 
LL#e)	$ %EKK%++E$%U-D-D
 	
 {{2U?3I3IJJr:   c                      t        j                   t         j                  t         j                  t         j                  fv fd       y )Nc                      d  S )Nz/Unsupported input type encountered for isin(): r4   r   s   r8   re   z3_check_for_unsupported_isin_dtype.<locals>.<lambda>  s    A%I r:   )rV   rg   r  
complex128	complex64r   s   `r8   !_check_for_unsupported_isin_dtyperh	  
  s/    	LLejj%"2"2EOODDIr:   c                 J    | j                  || j                  d      f      }|S )Nr   r  )r  r   num_weightsr  r  r	  s         r8   meta_embedding_dense_backwardrk	    s*     ''k6F6Fr6J(KLKr:   c                 j    |	rt         j                  | ||||||||
|
      S t        | ||||||||
|
      S rD   )r-   _embedding_bag_sparse_backward!meta_embedding_bag_dense_backward)rL  r   rw  r  r  maximum_indicesrj	  r  r]  r  r  r  s               r8   meta_embedding_bag_backwardrp	    se     22
 	
 1
 	
r:   c
                 N    t        j                   j                  t         j                  t         j                  t         j
                  t         j                  fv  fd       |t        k(  rt        j                  |d u        j                  | j                  d      f      }
|
S )Nc                  "    d j                    S )Nz$Unsupported input type encountered: r   )rL  s   r8   re   z3meta_embedding_bag_dense_backward.<locals>.<lambda>W  s    6tzzlC r:   r3   )
rV   rg   r^   ra  rb  r`  float64r  r   r   )rL  r   r  r  ro	  rj	  r  r]  r  r  index_grad_weights   `          r8   rn	  rn	  H  sv     
LL

u}}ennemmU]]SSC x_D01TYYq\'BCr:   c                    | j                  d      }t        j                  |t        k(  d        t        j                  | j	                         dk(         t        j                  |j	                         dk(         |j                  d      }t        j                  |j	                         dk(         t        j                  |j                  d      |k(         | j                  |f      }	|	S )Nr3   c                       y)NzHembedding_bag_backward: per_sample_weights only supported for mode='sum'r4   r4   r:   r8   re   z@meta_embedding_bag_per_sample_weights_backward.<locals>.<lambda>l  rm   r:   r   r   )r   rV   rg   r  r   r   )
rL  r9  r   rw  r  r]  r  embedding_featuresrf  r   s
             r8   .meta_embedding_bag_per_sample_weights_backwardrx	  _  s     1	LLZ 
LLq!	LL!#$,,q/K	LL"#	LLQ#556^^[N+FMr:   )assume_uniqueinvertc                   t        j                  t        | t              xs t        |t              d        t        | t              s!t        j                  | |j
                        } t        |t              s!t        j                  || j
                        }t        | j                         t        |j                         t        j                  | t         j                        S )Nc                       y)Nz<At least one of elements and test_elements must be a Tensor.r4   r4   r:   r8   re   zmeta_isin.<locals>.<lambda>|  rm   r:   r  r   )
rV   rg   rp   r   r0  r   rh	  r^   r   r  )elementstest_elementsry	  rz	  s       r8   	meta_isinr	  w  s     
LL8V$I
=&(IN h'<<1E1EFmV,]8??K%hnn5%m&9&9:HEJJ77r:   r  c                     t        j                  | dk\  d        t        |t        j                        \  }}t        j
                  ||      S )Nr   c                       y)Nz,polygamma(n, x) does not support negative n.r4   r4   r:   r8   re   z meta_polygamma.<locals>.<lambda>  rm   r:   r  r   )rV   rg   r   r   r  r   )r  r   rR   rS   s       r8   meta_polygammar	    sF     
LLaOP(;HHOA| D55r:   c                     t        d      )Nz.Tensor.item() cannot be called on meta tensors)r&  r   s    r8   meta_local_scalar_denser	    s    
G
HHr:   c                 ,    t        j                  |       S rD   r  r   s    r8   silur	    rv  r:   c                 l    t        | t        j                        \  }}t        j                  | |      S r  )r   r   r  rV   r   )r   rR   rS   s      r8   sigmoidr	    s3     );HHOA| D55r:   c                 R   | j                         dk(  }|j                         dk(  }|r|r4|j                  d      | j                  d      |j                  d      g}n"t        j                  |j                  d      |j                  d      k(  d        | j                  d      |j                  d      g}n|r[t        j                  |j                  d      | j                  d      k(  d        | j                  d      |j                  d      g}njt        j                  | j                  d      |j                  d      k(  d        | j                  d      | j                  d      |j                  d      g}|xs | j                  }t        j
                  j                  rZd|j                  z  }|d   |z   dz
  |z  |z  }||k(  r|d   |z  |dg}	n|dg}	t        j                  ||	|| j                  	      }
|
S t        j                  ||| j                  	      }
|
S )
Nr   r   r3   c                       yNz matrix batch sizes have to matchr4   r4   r:   r8   re   z2_create_grouped_mm_output_tensor.<locals>.<lambda>  rm   r:   r   c                       yr	  r4   r4   r:   r8   re   z2_create_grouped_mm_output_tensor.<locals>.<lambda>  rm   r:   c                       y)Nzbatched dimension has to matchr4   r4   r:   r8   re   z2_create_grouped_mm_output_tensor.<locals>.<lambda>  rm   r:   r(  r  )r   r   rV   rg   r^   r  r   itemsizer(  r   r   )rH  rJ  offsr=  
mat1_is_2d
mat2_is_2dr  	alignmentsize_paddedr  r   s              r8    _create_grouped_mm_output_tensorr	    s   qJqJ		!diilDIIaLAHLL		!		!,.X 		!diim4HLL		!		!,.X 		!diil3H LL		!		!,.V 		!diilDIIbMBH'TZZI}}),,,	|i/!3	AIM#"1+3[!DJ%q)J!!j	$++

 J kk()DKKHJr:   mat_amat_br	  c	                     t        j                  d u d u k(  d        d uxr d u}	|	rt         j                  }
t         j                  j                  rYt         j
                  j                         r;dt         j
                  j                  d      j                  v rt         j                  }
t        j                   j                  |
k(  xr j                  |
k(   fd       nTt        j                   j                  t         j                  k(  xr j                  t         j                  k(   fd       t        j                   j                         dv xr j                         dv  fd        j                         dk(  }j                         dk(  }|r|s8t        j                   j                  d	      j                  d
      k(  d        |	rDd }d }t        j                   |        fd       t        j                   |      fd       d } |d         |d       t        j                  j                  t         j                  k(  xr j                  t         j                  k(  xs< j                  t         j                  k(  xr j                  t         j                  k(  fd       j                  t         j                  k(  xr j                  t         j                  k(  dfd	}|r|rj                   d   nd} |d d|        |dd|       t        j                  |d u d        |s|r}t        j                  d u fd       xt        j                  j                         dk(  fd       t        j                  j                  t         j"                  k(  fd       nt        j                  d u d        t        j                  |d u d        t        j                  |d u xs |t         j                  k(  d        t%         |      S ) Nc                       y)Nz,Either both scale factors are given, or noner4   r4   r:   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>  rm   r:   gfx94r   c                  >    d j                    dj                    dS )Nz5Expected inputs of E4M3 FP8 type but got mat_a.dtype= and mat_b.dtype=r  r   r	  r	  s   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>  s%    KEKK=Xijojujuivvwx r:   c                  >    d j                    dj                    dS )Nz1Expected inputs of BF16 type but got mat_a.dtype=r	  r  r   r	  s   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>  s%    G}Tefkfqfqerrst r:   )r   r0   c                  L    d j                          dj                          S )Nz3Multiplicands must be 2D or 3D but got mat_a.dim()=z and mat_b.dim()=r   r	  s   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>  s'    Eeiik]Rcdidmdmdocpq r:   r   r   r  c                       y)Nz3contraction dimension of mat_a and mat_b must matchr4   r4   r:   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>   rm   r:   c                 F    | j                         }|d   dkD  xr |d   dk(  S Nr  r3   r   r  mat
mat_strides     r8   r  z-_meta_grouped_mm_common.<locals>.is_row_major
   s*    Jb>A%=*R.A*==r:   c                 F    | j                         }|d   dk(  xr |d   dkD  S r	  r  r	  s     r8   r  z-_meta_grouped_mm_common.<locals>.is_col_major   s*    Jb>Q&=:b>A+==r:   c                  0    d j                         dd   S )NzNExpected mat_a tensor to be row major in the last two dimensions, got strides r  r  )r	  s   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>   s#    dejeqeqestvtwexdyz r:   c                  0    d j                         dd   S )NzQExpected mat_b tensor to be column major in the last two dimensions, got strides r  r  )r	  s   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>   s#    ghmhththvwywzh{g|} r:   c                     j                         dz
  dj                         z  }j                         dz
     dk(  rG   t        dj                  dz
           k\  r%t        j                     |z  dk(   fd       y    dk(  rJdz
     t        dj                           k\  r(t        j                  dz
     |z  dk(   fd       y t        j                  dfd       y )Nr3   r(  r   c                  "    d d  d     dS )Nr   stride along % dim to be multiple of 16 bytes, got r  r4   end_dimmat_namer	  s   r8   re   zF_meta_grouped_mm_common.<locals>.check_valid_strides.<locals>.<lambda>$   s)    )H:^G9Dijtu|j}i~~  A r:   c                  .    d d dz
   d dz
      dS )Nr  r	  r3   r	  r  r4   r	  s   r8   re   zF_meta_grouped_mm_common.<locals>.check_valid_strides.<locals>.<lambda>+   sK    )H:^GaK=Hmnx  zA  DE  zE  oF  nG  GH  I r:   Fc                  *    d d j                    dS )NzInvalid strides/sizes, got z for strides and z for sizes.r9  r	  s   r8   re   zF_meta_grouped_mm_common.<locals>.check_valid_strides.<locals>.<lambda>0   s!    5j\ARSVS\S\R]]hi r:   )r   element_sizer   r  r   rV   rg   )r	  r	  r	  r	  r	  s   `` @@r8   check_valid_stridesz4_meta_grouped_mm_common.<locals>.check_valid_strides   s    '')a-#**,,	ZZ\
gk"a'Jw,?3syy1%D
 -
 LL7#i/14 A  A%*Wq[*ASsyy!F
 +
 LL7Q;')3q8 I
 LLir:   r	  r	  c                  >    d j                    dj                    dS )NzhFor FP8 scales must both be float32, or for MXFP8 both scales must be float8_e8m0fnu. Got scale_a.dtype=z and scale_b.dtype=r  r   r  s   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>=   sT    ~  @G  @M  @M  N  Na  bi  bo  bo  ap  pq  r r:   r3   c                    	
 j                         dk(  rt        j                  j                          fd       r;t        j                  j                         j                         k(   fd       y t        j                  j                         dk(   fd       t        j                  j                  d   j                     z  k(   fd       y t        j                  j                  d      dk(   fd	       t        j                  j                  d   j                  d   k(   fd
       rt        j                  j                  j                  dz
  k(   fd       j                  \  }}d}t        ||z  d      	t        |d      
t        j                  j                  d   k(  xr j                  d   	
z  k(  	
fd       y t        j                  j                         dk(   fd       t        j                  j                  d   j                  dz      k(   fd       y )Nr   c                      d  dS )Nr  z to be contiguous.r4   
scale_names   r8   re   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>H   s    i
|3EF r:   c                  B    d d j                    dj                    S )NzKFor MXFP8, scale must have same number of dimensions as target tensor, but  has mat.ndim= and scale.ndim=r  r	  r}  r	  s   r8   re   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>Q   sZ    "mnxmy  zH  IL  IQ  IQ  HR  Rb  ch  cm  cm  bn  !o r:   r3   c                  2    d d j                          dS )Nr  z to be 1D tensor, but got 	D tensor.r   r}  r	  s   r8   re   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>V   #    )J<7QRWR[R[R]Q^^g h r:   r   c                  V    d d j                      z   dj                   d    dS )Nr  z	 to have r  r   z
 elements.r9  )r	  r}  scale_multiplierr	  
scaled_dims   r8   re   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>Z   sW    )J<y:AVYiAi@jjyz  {F  {F  GH  {I  zJ  JT  !U r:   r   c                      d  dS )Nr  z( to be contiguous in the last dimension.r4   r	  s   r8   re   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>_   s    i
|3[\ r:   c                  P    d d j                   d    dj                   d    dS )Nr  z batch dimension to be r   , got r  r9  r	  s   r8   re   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>c   s6    i
|3J399UV<.X^_d_j_jkl_m^nnop r:   c                  B    d d j                    dj                    S )Nz0For MXFP8, 3d tensor should have 2d scales, but r	  r	  r  r	  s   r8   re   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>j   sF    "RS]R^^lmpmumulv  wG  HM  HR  HR  GS  !T r:   r)  r	  rA  c                  N    dj                    d  dz   dj                    S )NzFor MXFP8, expected mat.shape=z to have scale shape of (,z), but got r9  )G	blocked_K	blocked_Nr	  r}  s   r8   re   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>t   sQ    "@Kdefdgghiru~i~h  @K  LQ  LW  LW  KX  !Y r:   c                  2    d d j                          dS )Nr  z to be 2D tensor, but got r	  r   r	  s   r8   re   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>y   r	  r:   c                  V    d d j                   dz       dj                   d    dS )Nr  z non-batch dimension to be r3   r	  r  r9  )r	  r}  r	  r	  s   r8   re   z>_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>}   sT    )J<7RSVS\S\]^ak]kSlRmmstytt  AB  uC  tD  DE  !F r:   )r   rV   rg   rd  r   r   r   r?   )r	  r}  r	  r	  r	  rs  r  r  r	  r	  r	  is_mxfp8s   `````   @@@r8   check_scalez,_meta_grouped_mm_common.<locals>.check_scaleD   s   wwyA~'')F LL		swwy0 o
 LL		q(h LLA#))J*?BR*RR U
 LL$)\ KKNciil2p LL

chhl2 T "iiGAq!!#J (Z ;I (C 0ILLA!+WA)iBW0W Y
 LL		q(h LLA#))A
N*CC Fr:   r  r  c                       y)Nz:Scale result tensor provided, but it is not supported yet.r4   r4   r:   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>   rm   r:   c                  N    d j                          dj                          dS )Nz/Offsets tensor not provided, but is needed for zD/zD multiplicand layouts.r   r	  s   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>   s*    Eeiik]RTUZU^U^U`Taaxy r:   c                  ,    d j                          dS )Nz.Offsets tensor must be 1D, but got offs.dim()=r  r   r	  s   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>   s    HTUV r:   c                  $    d j                    dS )Nz7Offsets tensor must be integer (int32) tensor, but got r  r   r	  s   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>   s    QRVR\R\Q]]^_ r:   c                       y)NzJOffsets tensor provided, but is not needed for 3D/3D multiplicand layouts.r4   r4   r:   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>   rm   r:   c                       y)Nz2Bias tensor provided, but it is not supported yet.r4   r4   r:   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>   rm   r:   c                       y)Nz4If output dtype provided, it must be torch.bfloat16.r4   r4   r:   r8   re   z)_meta_grouped_mm_common.<locals>.<lambda>   rm   r:   r  )rV   rg   rc  r  r  r   r~  get_device_propertiesgcnArchNamer  r^   rb  r   r   r`  r  r   rC  r	  )r	  r	  r  r  r	  r;  r  r=  r  scaled	fp8_dtypemat_a_is_2dmat_b_is_2dr  r  r	  r	  r	  r	  s   `````             @r8   _meta_grouped_mm_commonr	    s    
LL	Dgo.> D 8WD%8F ''	MM

'')5::;;A>JJJ--IKK9$A	)Ax	

 	KK5>>)KekkU^^.Kt	

 
LL		v7%))+"7q
 ))+"K))+"KkJJrNejjn,I	

 	>	> 	z	
 	}	

0 ''w2]]emm+N0N !5!55 :MMU%9%99 r	
 MMU111 6!5!55 	
:	z "-++DJJqMST 	 	Iwq2BCIwq2BCD P	

 ky	
 LL
aV LL

ekk)_
 	DL`	

 
LLD
 
LLT8Y%..8F
 ,E5$	JJr:   c           
      (    t        | |d d ||d |      S )N)r  r  r	  r;  r  r=  )r	  )r	  r	  r	  r;  r=  s        r8   meta_grouped_mmr	     s)     #	 	r:   c	                 R    |xs t         j                  }t        | ||||||||	      S )N)r  r  r	  r;  r  r=  r  )rV   rb  r	  )	r	  r	  r  r  r	  r;  r  r=  r  s	            r8   meta_scaled_grouped_mmr	     s:     +U^^I"!%
 
r:   c                 6   t        |      t        d      k(  r/| D ]*  }t        j                  |j                         dkD  d        , g }| D ]K  }||n|j                  }|j
                  rt        |      }|j                  |j                  d|             M |S )Ninfr   c                       y)Nz:_foreach_norm cannot compute infinity norm on empty tensorr4   r4   r:   r8   re   z#meta_foreach_norm.<locals>.<lambda>   rm   r:   r4   r   )	rZ   rV   rg   r   r^   r   r   r   r   )tensorsordr^   r  resultsr=  s         r8   meta_foreach_normr	     s    SzU5\! 	ALL	AT	
 G 9".EAGG	0;Iq{{2Y{78	9
 Nr:   r=   half_to_floatc                 P   |rE| j                   t        j                  t        j                  fvrt	        d| j                    d      t        j                  | t
        j                  j                        \  }}|s|n|}t        j                  | |t        j                        }|S )Nz%half_to_float is True but x.dtype is z, expected half or bfloat16r  r/	  )r^   rV   rX   rb  r   rO   r   r   rP   r   r   )r=   r   r	  computation_dtyperS   r  s         r8   softmaxr	     s     775::u~~66 7y@[\  ',&>&>	uDDLL'#| (5<:KL


1L@W@W
XCJr:   c           	        	
 t        j                  t              dz  dk(  fd       | j                  t              
t              dz  }
|z
  	t        j                  
|k\  
fd       t	        d D              r| }t        	
      D ]t  d
z
  dz
  z     dk  r*|j                      |j                        z         }dz      dk  sL|j                  d|j                     dz      z         }v |j                         S t        d 	       }t        |      D ]^  t              dz   dz  z
  	z         z   dz      z   }t        j                  |dk\  	fd       |j                  |       ` t        j                  || j                  | j                  | j                  t        |             S )	Nr   r   c                       dt                S )Nz1Length of pad must be even but instead it equals r  rX  s   r8   re   z'_constant_pad_nd_meta.<locals>.<lambda>!  s    CCH:N r:   c                  (    dt               d  dS )Nz`Length of pad should be no more than twice the number of dimensions of the input. Pad length is z while the input has z dimensions.r  )l_inprY  s   r8   re   z'_constant_pad_nd_meta.<locals>.<lambda>!  s"     225c(;P' r:   c              3   ^   K   | ]%  }t        |t        j                        xr |d k   ' ywrt  )rp   rO   IntWithoutSymInt)rr   rH  s     r8   rt   z(_constant_pad_nd_meta.<locals>.<genexpr>!  s)     
I:a//0;Q!V;
Is   +-r3   c            	      F    d z       d    ddz       d z    d	S )NzThe input size z, plus negative padding r   r3   zG resulted in a negative output size, which is invalid. Check dimension z of your input.r4   )r   r   l_diffrY  pad_idxs   r8   re   z'_constant_pad_nd_meta.<locals>.<lambda>)!  sG    ok&1*&=%>>V7|nE#gk"2!3 4117!OM r:   )r^   r   r   r   )rV   rg   r   r   r  r   narrowr   r   r   r   r^   r   r   r   )r   rY  r+  l_padc_input	new_shapenew_dimr   r   r	  r	  r	  s    `     @@@@@r8   _constant_pad_nd_metar	   !  s    
LLC1N
 ++KEHMEU]F	LL	 
IS
IIvu% 	TA519q=)G7|a!..G}gmmA&6W&E 7Q;!#!..Aw}}Q/?#gPQkBR/RS	T }}[&)*I5\ 	"c(q1uk*fqj)CL83w{;KKqLM	
 	!	" ;;kk||))+E2 r:   r  r  r  c                 D   | j                         dk7  rt        d| j                          d      | j                  }|j                  }|j                  dk(  r|d   f}n$|j                  dk(  r|d   |d   f}n
g ||d   }| j                  }| j                  ||      S )Nr   z'weight' must be 2-D, got z-Dr   r3   r   )r   r   r   r   r^   r   )	r9  r   r  r  r  weight_shapeindices_shaper   r=  s	            r8   	embeddingr 
  8!  s     zz|q9&**,rJKK<<LMMM||q&21o%7			"1%|A7	5m5\!_5	IIY77r:   max_lengthspadding_valuec                    t        |      dk7  rt        dt        |       d      t        |      dk7  rt        dt        |       d      |d   j                  d   dz
  }|d   }||g| j                  dd  }| j                  |      S )Nr3   z&Only one jagged dim is supported, got z offsetsz max_lengthsr   )r   r   r   r   )r   rw  r
  r
  r  rx  rg  s          r8   $meta__jagged_to_padded_dense_forwardr
  Q!  s     7|q4S\N(K
 	
 ;14S5E4FlS
 	
 	
aAAAq,6<<+,LL))r:   c                 B    t        |       t               d               }|S )Nc                 8    t        | t        j                        S r  rT   r   r  r  s    r8   _fz)_create_unary_float_meta_func.<locals>._fi!  s      =JJ
 	
r:   rK   r$   funcr
  s     r8   _create_unary_float_meta_funcr
  h!  *    4]
  

 Ir:   c                    | j                   s|j                   s|j                   rt        d      | j                         dk(  r,| j                  | j                        | j                  d      fS | j                  d      }| j                  d      }|j                  d      }| j                  |||      }|
r1|r| j                  |||      }||fS | j                  ||||      }||fS | j                  d      }||fS )NzP_native_multi_head_attention fake implementation does not support nested tensorsr   r3   )	is_nestedr  r   r   r   r   )r;  r   r+  	embed_dimr  
qkv_weightqkv_biasproj_weight	proj_biasr  need_weightsaverage_attn_weights	mask_typer  T
output_dimr   attn_weightss                     r8    native_multi_head_attention_faker
  t!  s      #--5??!^
 	
 {{},eooa.@AA

1A

1A !!!$J__Q:.F !??1a3L L!!	 !??1h1=L L!! q)L!!r:   c                 B    t        |       t               d               }|S )Nc                 :    t        | |t        j                        S r  r
  r<   s     r8   r
  z*_create_binary_float_meta_func.<locals>._f!  s      q!@!M!M
 	
r:   r	
  r

  s     r8   _create_binary_float_meta_funcr
  !  r
  r:   c                      t                fd       } j                   d}||_         t        t        t        |            |      }|S )Nc                 `     | g|i |}t        | j                  |j                         | S rD   r*  )r   rQ   r  r   rF   s       r8   _fnz#_register_inplace_meta.<locals>._fn!  s.    '''

CII6r:   rR   )r   r{   rK   getattrr-   )rF   r!
  inplace_names   `  r8   _register_inplace_metar$
  !  sO    
2Y 
 kk]!$LCL
4-l3
4S
9CJr:   c                 f    t        j                   j                  j                  k(   fd        g}t        t              rQj
                  dk7  r1t        j                   j                  j                  k(   fd       |j                         t        |dt        j                  iS )Nc                  <    dj                    d j                    S )Nr  z for `end`, but got dtype r   )r|   r}   s   r8   re   zlerp.<locals>.<lambda>!  s    /%++.HT r:   r   c                  <    d j                    dj                    S )Nr  z for `weight`, but got dtype r   )r}   r9  s   r8   re   zlerp.<locals>.<lambda>!  s!    /%++6STZT`T`Sab r:   rL   )
rV   rg   r^   rp   r   r   r   rT   r   rP   )r}   r|   r9  rQ   s   ``` r8   lerpr(
  !  s     
LLsyy T 3<D&*%;;!LLv||+b 	F	=EE r:   )r+  c                <    t        | ||t        j                        S r  r  r   tensor1tensor2r+  s       r8   addcmulr-
  !  s!     w0O0W0W r:   c                    t        j                  t        j                  |j                        xr t        j                  |j                         d        t        | ||t        j                        S )Nc                       y)N)zFInteger division with addcdiv is no longer supported, and in a future zErelease addcdiv will perform a true division of tensor1 and tensor2. z4The historic addcdiv behavior can be implemented as zA(input + value * torch.trunc(tensor1 / tensor2)).to(input.dtype) zfor integer inputs and as z6(input + value * tensor1 / tensor2) for float inputs. z?The future addcdiv behavior is just the latter implementation: z4(input + value * tensor1 / tensor2), for all dtypes.r4   r4   r:   r8   re   zaddcdiv.<locals>.<lambda>!  rm   r:   r  )rV   rg   rO   r>  r^   rT   r   rP   r*
  s       r8   addcdivr0
  !  sb     
LL""7==1 6&&w}}5	
		
  w0O0W0W r:   c                     i } dD ]  }t         |   }|D ]  }|| vs||   | |<    ! | j                         D ]  \  }}t        |t        j                  j
                        r,t        |t              st        dt        |              |j                  t        j                  j                  j                        |       t        j                  j                  |j                         d      r|t         d   v st        | d      |j                   r|j                         dv rd|j                         v rt"        j%                  ||       d|j                         v rt&        j%                  ||       Hd	|j                         v rt(        j%                  ||       rd
|j                         v rt*        j%                  ||       t,        j%                  ||        y )N)r   post_autogradpre_autogradz$op_overload must be OpOverload, got CompositeImplicitAutogradr   z is a CompositeImplicitAutograd op, we shouldn't register meta function for it. Instead, we should let the decomposition run and write meta kernels for the base operators.>   aten::cloneaten::copy_aten::rot90aten::_to_copyaten::empty_stridedaten::constant_pad_ndaten::as_strided_scatterzmkldnn::zmkl::zonednn::zquantized::)r   itemsrp   rV   _opsHigherOrderOperatorr   r   rz   py_impl_CDispatchKeyr/   %_dispatch_has_kernel_for_dispatch_keyr  r&  is_view2_meta_lib_dont_use_me_use_register_meta_for_mkldnnimpl/_meta_lib_dont_use_me_use_register_meta_for_mkl2_meta_lib_dont_use_me_use_register_meta_for_onednn5_meta_lib_dont_use_me_use_register_meta_for_quantized'_meta_lib_dont_use_me_use_register_meta)activate_meta_tabletypregistryopoop_overloadrF   s         r8   activate_metarO
  "  s    9 9-c2 	9C--+3C=#C(	99 /446 9NR
 k5::#A#AB+z2 6tK7H6IJ  	7EHH00556r:8899 ;
 8@@""m $; ; 
    	 [--//BGGUWXK,,..?DD[RTU{//11BGGUWX+"2"2"44EJJ 8<<["Ms9Nr:   )Fr
	  rD   )NNNFr   r3   r   r  )Tr  )r  )r  T)FF)TT)r_  )FTN)TFF)TF)r   )g      ?N)r@   str)r4   r  r  F)r4   r  FTN)Fr   FNFr   )NF)r   F)g      ?gUUUUUU?FN)NNNNN)r   NNr3   )NNF)        FFN)NrQ
  FFN)rQ
  FNN)NrQ
  FNN)rQ
  FN)FN)FNNNN)NNNF)NNNNF)Nr   FNN)NNNN)r   TT)NNr   N)d   r   r   )r   )r   N)r   FF)rQ
  )NTTN(  r  collections.abcr   r   enumr   	functoolsr   typingr   typing_extensionsr   rV   torch._prims_commonr  rO   r	   r
   r   torch._decompr   r   r   r   
torch._opsr   torch._primsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   torch._prims_common.wrappersr    r!   r"   r#   r$   r  r%   r&   torch.fx.experimentalr'   rd  torch.nn.functionalr(   r)   torch.utilsr*   rH   r+   r,   opsr-   libraryLibraryrI
  r   r  r  r  r9   r?   rK   rT   r`   ri   linspacelogspacer  r   taker)  r   r   r   r   cummaxcumminr   r   r   r   r   r  r   _fft_c2cr   r   r   _fft_r2cr  randpermgenerator_outr	  r   r  randintr  r  low_outr  randr  _fft_c2rr!  r  r-  r4  
unsqueeze_r8  _sparse_semi_structured_linearrP
  r^   rG  _sparse_semi_structured_mmrT  _sparse_semi_structured_addmmrX  _cslt_sparse_mmrh  index_reducero  index_reduce_rq  index_selectru  segment_reducer  r  	unary_outr  r   r  r   r  r  r  r  r  _assert_asyncr  msgr  _printr  _make_dep_tokenr  r  _functional_sym_constrain_ranger  r  (_functional_sym_constrain_range_for_sizer  _functional_assert_asyncr  r   r  r   r  r  r  r  _linalg_eighr  r  _linalg_eigvalslinalg_eigvalsr  
linalg_eigr  r  r  r	  r  r  r  r*  linalg_inv_exr-  linalg_ldl_factor_exrf   r3  linalg_ldl_solverB  	linalg_lurI  linalg_lu_factor_exrM  linalg_lu_solverV  	lu_unpackr\  re  	linalg_qrrl  rp  rm  _linalg_svdry  r?  r
  r  r  linalg_solve_triangularr  r  r  _linalg_detr  r  r  r  reflection_pad1dr  replication_pad1dr  r   reflection_pad1d_backwardr  replication_pad1d_backwardr  r  reflection_pad2dr  replication_pad2dr  _weight_norm_interface_backwardr%  reflection_pad2d_backwardr  replication_pad2d_backwardr,  r:  reflection_pad3dr<  replication_pad3dr?  reflection_pad3d_backwardreplication_pad3d_backwardrG  _pdist_forwardrZ   rK  _pdist_backwardrQ  baddbmmrj  	bernoullirm  
bernoulli_rp  rH  rs  poissonru  _fused_moving_avg_obs_fq_helperr  mmr  r  r   r  r  miopen_batch_normr  convolutionr  r@
  _has_mkldnnrD
  r  _convolution_pointwiser  _linear_pointwiser  has_mklrF
  r  _mkl_linearr  rG
  r  qconv2d_pointwiseqconv_pointwiser0  r  binarybinary_tensorr  qlinear_pointwiser  r  linear_dynamic_fp16linear_relu_dynamic_fp16r  rH
  r   
max_pool2dr  int4mm_packed_weight_cpur  r   
avg_pool2drE  rJ  avg_pool2d_backwardrR  
avg_pool3drl  avg_pool3d_backwardrw  _adaptive_avg_pool2dr|  _adaptive_avg_pool3dr  _adaptive_avg_pool2d_backwardr  _adaptive_avg_pool3d_backwardr  r  adaptive_max_pool2dr  r  r  adaptive_max_pool3dr  r  r  repeat_interleaver  rq   r  r  r  r   _unsafe_indexr  convolution_backwardr  addbmmr  randint_liker  _fused_adam__fused_adamw_r  _fused_adamr  _int_mmr!  _convert_weight_to_int4packr,  #_convert_weight_to_int4pack_for_cpur1  _weight_int4pack_mmr;  _weight_int4pack_mm_for_cpurA  rJ  rL  rx  _dyn_quant_pack_4bit_weightr  _dyn_quant_matmul_4bitr  _weight_int8pack_mmr  _cdist_forwardr  _cdist_backwardr  _embedding_bagr  _embedding_bag_forward_onlyr  r  nansumr  median	nanmedianr  
dim_valuesr]  r   r  logical_not_r  repeatr  zero_r  mul_Scalardiv_logical_and_logical_or_logical_xor_r  add_sub_r  r  subr  rounddecimalsr  r  
__rshift__r  
__lshift__r  zeror  r  r  fillr  relu_r  	_add_relur  rrelu_with_noiser   rrelu_with_noise_functionalr"  rrelu_with_noise_r$  	index_put_unsafe_index_putr(  masked_fill_r,  _masked_scaler/  masked_scatter_r3  masked_scatterr5  masked_scatter_backwardr7  
index_put_r9  rG  bmmrJ  rL  rO  rT  r9  r:  ra  r{  rs  r   max_pool2d_with_indices_backwardr  max_pool2d_with_indicesr  fractional_max_pool2dr  max_pool3d_with_indicesr   max_pool3d_with_indices_backwardr  r  r  r  grid_sampler_2d_backwardr  r  r  r  r  onesr  zerosr  select_scatterr  slice_scatterr  r   r  r   gatherr  r  r  r  r)  r,  scatter_addr1  scatter_add_r4  r+  r*  r+  rk  value_reducer8  scatter_r:  #_scaled_dot_product_flash_attentionrO  rT  r]  #_scaled_dot_product_cudnn_attentionrg  0_scaled_dot_product_fused_attention_overrideablerj  ,_scaled_dot_product_flash_attention_backwardrt  +_scaled_dot_product_flash_attention_for_cpurw  4_scaled_dot_product_flash_attention_for_cpu_backwardr{  *_scaled_dot_product_attention_math_for_mpsr  '_scaled_dot_product_efficient_attentionr  0_scaled_dot_product_efficient_attention_backwardr  ,_scaled_dot_product_cudnn_attention_backwardr  _flash_attention_forwardr  r  _flash_attention_backwardr  _efficient_attention_forwardr  _efficient_attention_backwardSymIntr  r  
_scaled_mmr  rZ  _scaled_mm_v2r]  scatter_reducetwotwo_outra  scatter_reduce_rc  multinomialrh  rl  rz  r  _upsample_nearest_exact1dr  _upsample_nearest_exact2dr  "_upsample_nearest_exact2d_backwardr  _upsample_nearest_exact3dr   r  values_stabler  r  _thnn_fused_lstm_cellr  r  r  r  r  r  r  argminr  r  topkr  _segment_reduce_backwardr  kthvaluer  r   r	  r	  r	  r	  pixel_shuffler	  r,	  	bucketize
Tensor_outr2	  
Scalar_outr4	  histcr?	  _upsample_bilinear2d_aa_upsample_bicubic2d_aarC	   _upsample_bilinear2d_aa_backwardrG	  rN	  rS	  r  rZ	  searchsortedrc	  rh	  embedding_dense_backwardrk	  _embedding_bag_backwardrp	  _embedding_bag_dense_backwardrn	  *_embedding_bag_per_sample_weights_backwardrx	  isinr	  	polygammar	  _local_scalar_denser	  r	  r	  r	  r	  _grouped_mmr	  _scaled_grouped_mmr	  _foreach_normr	  _softmaxr	  constant_pad_ndr	  r 
  _jagged_to_padded_dense_forwardr
  r
  _native_multi_head_attentionr
  r
  special_airy_aispecial_bessel_y0special_bessel_y1special_modified_bessel_i0special_modified_bessel_i1special_modified_bessel_k0special_modified_bessel_k1!special_scaled_modified_bessel_k0!special_scaled_modified_bessel_k1special_chebyshev_polynomial_tspecial_chebyshev_polynomial_uspecial_chebyshev_polynomial_vspecial_chebyshev_polynomial_w&special_shifted_chebyshev_polynomial_t&special_shifted_chebyshev_polynomial_u&special_shifted_chebyshev_polynomial_v&special_shifted_chebyshev_polynomial_wspecial_hermite_polynomial_hspecial_hermite_polynomial_hespecial_laguerre_polynomial_lspecial_legendre_polynomial_pr$
  r(
  r-
  r0
  lerp_addcmul_addcdiv_torch._refs.nn.functionaltorch._refs.specialrO
  r4   r:   r8   <module>rW     sM    .    '  # + +  " U     < 7 8 ) T]t_yy~~*/--*?*?PV*W ' %a )X"

8BF#3"4hr2v6F"FG 
3(* t}}-.
 

==7  /7t 		!!499==12'  3' !!))4+<+<+@+@AB%' %  C%$ t%%&I  'I 	[[$++//4;;+>+>P Xy! " !!))4+<+<+@+@ABI  CI3lV $s) 4  %%t}}'8'89:K  ;K $s)  %%t}}'8'89:8
  ;8
v t}}**+"& 3 ,3 t}}$$% **
 &
 $$dll&6&678
 **  9&   $,,"6"678 **  9& 		!!499==12%)$tPT   3 %%t}}'8'89:$Dv $DDI $Dc $DC $D  ;$DN tzz!!" #0	( t&&' ( t223
 "&$(((( ( 4-	(
 4Z( {{T!( 4(V t../
 %)	
  {{T!	 0> t112 	
$())
) ) 	) {{T!) 3)X t##$ $(";<,,;<\\;< 4-;< D=	;<
 {{T!;< ;< ;< ;< ;< %;<| t  (() 	I
	I		I 	I LL		I
 	I 	I 	I *	I t!!))* 	
			 	 LL		
 	 	 	 +	 t  (()' * ' t""**+
 "!! W
 W W d]	 W
 d] W d] W  W  W  W , WF   $(("4"456  7 txx||    $(("4"456  7 txx||  tzz!!"6 #6 tzz~~( (
 t!!))* + t!!%%& ' t{{""# $ t##++, ) -) t''//0, 1, t33;;< =
 t008896 :6& t<<DDE F
 t,,001 2

F 
C 
    F  #  N (,


 !%
$V S C 
 
F 
$ 
 
"  	  	C  !!))4+<+<+H+HIJ]N+ s T  , K" $$,,d.A.A.E.EFGB B6 B  HB  !]N+6  , " Q QF Q t**+) )F )4 )F )  ,) t""#J JF J4 JF J  $J t}})6 )$ )6 )  ) t$$%)6 )$ )6 )  &) t&&../&  T  0" 	$$,,d.M.M.Q.QR .f .6 .f . .d t!!))*&   + ))1143L3L3P3PQRT8V$ 	
  	
 666!" % S& %%--t/D/D/H/HIJ ''' '
 ' '  K'T &&(:(:;<S#s/3 f  fff>T8U   =6 ((00$2J2J2N2NOPT8V$ 	     	 
 666!"  % Q F $$,,d.B.B.F.FGH 444 4
 4 4 4  I4n t~~S#s 	$$$ $ 	$
 666!"$  $P tTz!2 * &&(:(:;<S#f C ffn8M   =4 $$,,d.B.B.G.GHIV[$1'v '%(F"G ' 2 J'$ t''(  	""" " $J	" )"J.
.
. 49d3i .".
.
. *. 66>	.(f V   t$$%
   777 	7
 7 TM7 	7 TM7 4-7 6666)*7 &7t ,,44d6R6R6V6VWX  	
   
$  Y4 t$$%S#4( +(
+(+( +( 	+(
 +( 66>+( ) &+(^ t''(
 )
 tzz
 WW	W W 	W
 W W  Wt>#;L t$$%=  &= t%%&>  '>(< t--.\S  /S t../\T  0T2Ej t$$%=  &= t%%&>  '> ((00 &&..&&11''//''22	 \& &:<G~ t$$%=  &= t%%&>  '> &&..&&11''//''22	 \(( ((V t""#

f 

 

v 

  $

 t##$Pv PV P Pf PQW P  %P $$dll&6&678/0 '  9': &&(:(:;<&* I  =I
 t$$% & t~~ I !I
 $$dll&6&678"  9" t33;;< * =*. tww3U[[4/ 3  3,B
* .2B,,BLLB IOB #Y_	B
 3i#oB B B IOd*BJQ t%%--."$,,"$LL"$ ,,
"$ ,,%	"$
 $"$ "$ !&"$ "$ /"$J t''(,,LL ,, I	
 #Y 3i  I  )> 	889>9N9N&&:6 599##::BBC D, 599##55==>S ?S
 xx:?--:O:O66;
7 
uyy}}00	1	 
2	
 :?9N9N&&:6 599##55==>599##33;;<599##33::;4 < = ?4l 599##55<<=599##55CCD E >< 599##55==>599##55<<= > ?> 599##55<<=599##55CCD$ E >$L 599##77??@599##<<DDE	 F A	 =BMM<Q<QVV=9 599&&112 
 3
8 599&&??@@ A@, t&&' M (Mb(<X t''//0E 1EP t UJ   UJp t''(\K(  )K(\ t((001 2" t((001@ 2@ t1199:F ;F, t112\P  3P
	
6 	
S 	
 t''(UI+  )+\ t001\H  2H$ t''(UI'  )'T t001\(  2(
 t%%,,-* .* $$dll&6&678
  9
 ##++T-@-@-D-DEF46 

c 

  G

 		&&..		0F0F0J0JKL  M" 

!!4#5#5#<#<=>A ?AH ))11234H 44Hn ##T[[__56./q '  7'0 !!(()*' +' !!))4+=+=+E+EFG  !
 H
2   (()*  !! +!H ~B  B* 0012 3& 889: ; (()*8 +8  0012< 3< >>?@< A<"3 "3 "3 "@F 0012D!'$D 3D4 ++,-; .;( (()*< +< t""**+ & , &F t##$G  %G* t""**+
 	
`5 ,`5F t//7785 95
 ##T[[__56=$ =  7= ##T^^%;%;<=) >) !!					 Xy! "	 t  (() * t{{""#' $'& tzz!!" # 								!!  !!

 									**Z  

""DJJ$7$789 :
" &&(>(>?@ A &&(>(>?@ A tyy  !& "& 

!!4::#4#456 7 		  $))"2"234" 5" tzz!!" # t~~$$%F   & %%&'RV"  (" 0012RV; 3; &&'(KO ) &&(>(>(F(FGH" I" t  ''( )
 t!!))* + t##$	 %	 t""#6  $6 t++,! -! t&&' ((V txx 5 !5 txx~~J J6;h #-YYY 	Y 		Y
 	Y 	Y 	Y 	Y 	Y 	Y 	Y Y Y Y Y  !Y" #Y$ %Y& 'Y( )Y* +Y, -Yx;4|383838 38 		38
 	38 	38 	38 	38 	38 	38 	38 	38 38 38 38  !38" #38$ %38& '38lI2X t44<<=( >(V t++334 # 5#L t))112Q 3Qh t++,UI _  -_D t445\\  6\~%
V %
6 %
Pt  v 3 $ t,,445# 6#$ t##$8  %8" t,,-\;'! ( .!, 		!!"#. $. t&&' ) ()X 		!!499==12   3( 

""DJJNN34   5( t""**+. ,. t!!))*. +.
 C d  /
  t{{""#' $'6
 
%R4 t''(& )&
 t  ! "
 !!	 & & ""	 88@@AB
 #<<	< < 	<
 < < 4<< C<~ 88BBCD #	  }	
 } }    4< E4S#X& 889: #((	( ( }	(
 ( ( ( ( 4<( ;(V EEFG
  $#''	' ' }	'
 ' ' ' 4<' H'T 99( """ 
" 	"
 
" " " " " " " " " " 4<"
". 88 #	  	
  } 4<
: AA  $!"!"!" 
!" 	!"
 
!" !" !" !" }!" 4<!"
!"H ??@A
  $"&3&3&	3& 3& }	3&
 3& 3& 4-3& 4<3& 66>3& B3&l <<=> ))))	)) )) }	))
 )) )) 4<)) ?))X ==" 4-4-4- 
4- 	4-
 }4- 
4- 4- 4- 4- 4- $Z4- 4- 4<4-
4-n 99* !""" 
" 	"
 
" " " " " " " " " " "  4<!"
"0 %%--  #'$(#"&HH	H H }	H
 }H H H H H H 4<H DjH TzH }H 4-H
HV --7789 #'$(#"&%''	' ' }	'
 }' ' ' ' ' ' }' }' }' 4<' Dj'  Tz!'" }#'$ 4-%' :'T &&( #'$(#,,, 
, 	,
 
, , , , , , , , , , 4<,  Dj!," Tz#,
,4 ))   %%)""/S/S	/S /S 4-	/S
 4-/S 4-/S */S */S /S /S /S 4</S d]/S tm/S t/S
/Sd *** !%"'%474747 
47 	47
 4-47 4-47 4-47 ,,47 ,,47 47 47 47 47 47 47  4<!47" $J#47$  %47
47x !%(,$( zY
,,zY
,,zY \\zY \\	zY
 ,,
zY ,,%zY {{T!zY zYz ''() !%(,$( 
,,
,, \\ \\	
 ,,
 ,,% {{T!  *, !%$(*.*. dC
,,dC
,,dC %,,dC %	dC
 %,,dC %dC ,,
dC {{T!dC K 4'dC K 4'dC dCN ""**+, !%'+)- 
,,
,, %,, %	
 K  %,, % K  ,,
 ++$ 3i$&  -: ##'')<)<)D)DEF&  G&
 t##''( )
   (($*:*:*>*>?@	 	  A	,* 	$$d&D&D&L&LM

 	$$d&D&D&L&LM. ((00//77 "!#,- u||+, dl	
 dl: 	$$d&D&D&L&LM

 									*$N t))112
  3 t&&'4/ (4/n t&&') ()X t$$,,-$% .$%N


 ##T[[%8%89:4 ;4 t!!))* + tyy  !
Q "
Q t,,-LP  ." %%t}}';';<=Xy!K " >K  #("9"9 Q t77??@	* A	* t##++,	0 -	0 t!!))* +@ t--556F 7FD %%t~~'@'@AB27u   C %%t~~'@'@AB
 



 	

 
 C
 

|E  E4 	!!))4+F+F+N+NO & 55==>?  @8 t>>FFG H$ '')<)<=>"  ?" uyy~~(() *: uyy~~  !6 "6( t  !
 
	-K  "-K` t,,- . t++, '
 -'
T t112  3, t>>?  @. tyy8=e 8  8  t~~6c 6 6F 6  6 t''(I& I )I tyy"v "& "  " t||6& 6V 6  6'^ (,$( VKVKVK \\D VK \\D 	VK
 4-VK 4-VK ,,%VK {{T!VK VKr t  $( 4- 4-	
 {{T!   !& ''() !% $(,$( <<<< \\ \\	
 ,,
 ,,
 ,,% {{T!  *6 t!!(() *  t}}v C       t##$3  %3l t~~ $888 8 	8
 8 8  8. t33;;<
 	**&\* c* 	* =*, t00889 
-" :-"` d22 3 d44 5 d44 5 d== > d== > d== > d== > dDD E dDD E tBB C tBB C tBB C tBB C tJJ K tJJ K tJJ K tJJ K t@@ A tAA B tAA B tAA B tyy  $ t||./    t||./   , 	tyy)!$,,/!$,,/
    ENP r:   