
    i}                   >   U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZmZmZmZmZ d dlmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z" d dl#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z) d d	l*m+Z+ d dl,Z,d d
l,m-Z-m.Z.m/Z/ d dl0m1c m2c m3Z4 d dl5m6c m7Z8 d dl9Z:d dl;Z:d dl<m7c m=Z> d dl?m@Z@ d dlAmBZB d dlCmDZD d dlEmFZF d dlGmHZH d dlImJZJ d dlKmLZLmMZMmNZNmOZOmPZP d dlQmRZRmSZSmTZTmUZUmVZVmWZWmXZXmYZYmZZZ d dl[m\Z\ d dl]m^Z^ d dl_m`Z` d dlambZbmcZcmdZdmeZe d dlfmgZg ddlhmiZimjZj ddlkmlZlmmZmmnZnmoZompZp ddljmqZqmrZrmsZsmtZtmuZu ddlvmwZw ddlxmyZymzZzm{Z{m|Z| ddl}m~Z~ ddlmZmZ dd l7mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ dd!lmZmZmZ er+d d"lmZ d d#lQmZ d d$l[mZ dd%lmZ dd&lmZ dd'lmZ dd(l7mZ neZd)ed*<   	 d dlZejT                  Zd+Z e'd-      Z e!d.      Z e!d/      Z e!d0      Ze"ee-f   Zd)ed1<   e"eee-f   Zd)ed2<   e"e:jl                  jn                  e:jl                  jp                  f   Zd)ed3<    ejt                  e      Z ej*                  e	jz                  d45      Ze:j2                  j|                  Z	 e"ed6eed6f   d7d8eee"eeed6f   d6d7d8f         f   Zd)ed9<   d-d:Z ej                  d+;       G d< d=             ZĐd.d>ZŐd/d?ZƐd0d@Zǐd0dAZ	 	 	 	 	 	 d1dBZg dCZg dDZ	 d2	 	 	 	 	 d3dEZ̐d4dFZ	 d2	 	 	 	 	 d3dGZed5d6dH       Zed5d7dI       Z	 d5	 	 	 	 	 d8dJZ	 	 	 	 d9dKZ	 	 	 	 d:dLZѐd;dMZҐd;dNZӐd<dOZ	 	 	 	 	 	 	 	 d=dPZ	 	 	 	 	 	 d>dQZ֐d?dRZאd@dSZdT Z G dU d8      Z ed,;       G dV dW             Ze G dX dYeګ             ZܐdAdZZe G d[ d\eܫ             Ze G d] d^eޫ             Z ed_       ed`       eda       edb       edc       edc       edd      deZdfedg<   	 d5	 	 	 	 	 	 	 dBdhZe G di djeܫ             Zd e.d       f	 	 	 	 	 	 	 dCdkZeee-   ee-   gef   Zd)edl<    G dm dne      Z G do dpe      Z G dq dre      Ze G ds dteܫ             Ze G du dve             Ze G dw dxeܫ             ZdDdyZdDdzZ	 	 	 	 	 dE	 	 	 	 	 	 	 	 	 	 	 	 	 dFd{Z	 	 	 	 	 	 dGd|ZdHd}Ze G d~ deګ             Ze G d de             Ze G d de             Ze G d de             Ze G d de             Ze G d de             Ze G d de             Ze G d de             Z G d de      Ze G d deګ             Ze G d de             Ze G d de             Z	 	 	 	 	 	 dIdZdJdZ G d d      Ze G d de             Z G d de      Z  G d de      Z G d de      Z G d de      Z G d de       Ze G d de             Z G d de      Z ed,;       G d deem             Z ed,;       G d dee۫             Z G d de      Z	 G d de	      Z
 G d de	      Ze G d deګ             Ze G d deګ             Z ed,;       G d de             Z G d de      Z G d de      Ze"eeeeee"eeeef      f   Z G d d      Z G d de      Z G d dÐe      Z G dĄ dŐe      Z G dƄ dǐe      Z G dȄ dɐe      Z G dʄ dːe      Z	 	 	 	 dKd̄Z ed,;       G d̈́ dΐe             Z G dτ dАe      Z G dф dҐe      Z ed,;       G dӄ dԐe             Z ed,;       G dՄ d֐e             Z  G dׄ dؐe       Z! G dل dڐe      Z" G dۄ dܐe      Z# G d݄ dސe      Z$ G d߄ de$      Z% G d de$      Z& G d de      Z' G d de      Z( G d de      Z) G d de      Z* G d de      Z+ G d de+      Z, G d de"      Z- G d de      Z. G d de      Z/ G d de       Z0 G d de      Z1 G d de      Z2 G d de      Z3 G d de      Z4 ed,;       G d d              Z5 G d de"      Z6 ed,;       G d de6             Z7 G d de6      Z8e G d de             Z9 G d	 d
e      Z:ej                   G d deګ             Z; G d d6e;      Z< G d de;      Z= ed,;       G d deګ             Z>dLdZ? ed,;       G d de             Z@ ed,;       G d de             ZA	 	 	 	 dMdZB ed,;       G d de             ZC G d de6      ZD G d deګ      ZEe G d deE             ZFe G d  d!eE             ZG G d" d#e6      ZH G d$ d%eH      ZI G d& d'eH      ZJ G d( d)eH      ZKdNd*ZLdNd+ZMdOd,ZNy# e$ r dZd,ZY w xY w(P      )annotationsN)Callable	GeneratorIterableIteratorSequence)AbstractContextManagernullcontext)Enum)partial)AnycastClassVarLiteralOptionaloverloadSupportsFloatSupportsIntTYPE_CHECKING	TypeAliasTypeVarUnion)assert_neverNeveroverride	ParamSpecSelfTypeIs)patch)ExprIntegerSymbol)identity)GraphModuleSerializer)can_auto_functionalize)metricsget_free_symbols)is_opaque_type)compute_required_storage_lengthis_boolean_dtypeis_float_dtypemake_channels_last_strides_for
StrideType)	&_remove_effect_token_unbacked_bindingscompute_unbacked_bindingsfree_symbolsfree_unbacked_symbolsIterateExprsrebind_unbackedresolve_unbacked_bindingsShapeEnvSymTypes)Node
OrderedSet)_disable_current_modes)CleanDivFloorDivModModularIndexing)SymT   )configdependencies)BackendFeatureCodegenSymbolget_scheduling_for_deviceindex_prevent_reorderingKernel)Depextract_free_symbols#extract_input_node_reduction_rangesextract_read_writesvar_builder)LoopBody)OpCounterCSEOpCountResultReductionType	StoreMode)benchmarker)DevicePropertiesReductionHint)argsortargsort_symcache_on_selfcache_on_self_and_argsceildivconvert_shape_to_inductorconvert_shape_to_symintdeveloper_warningdo_bench_using_profilingdtype_from_sizeget_dtype_sizeget_kernel_metadataGPU_ALIGN_BYTESir_dataclass
is_dynamicis_gpu	sympy_dotsympy_index_symbolsympy_index_symbol_with_prefixsympy_product
sympy_substensor_is_aligned)opsOpsValueV)FakeScriptObject)SympyBoolean)Argument)CUTLASSTemplate)PythonWrapperCodegen)GraphLowering)IndentedBufferr   rr   TF_P_T_U_V_IntLike_NumLike_OpOverloadsz  prefix	TensorBoxr"   IRNode_NodeOrNodesc                .    t        | t        t        f      S N)
isinstanceintr!   xs    c/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/torch/_inductor/ir.py
_is_staticr      s    a#w((    )frozenc                  J    e Zd ZU ded<   ded<   ded<   ded<   d	ed
<   ded<   y)GraphPartitionSignatureOrderedSet[sympy.Symbol]symbol_inputsz5dict[str, Union[IRNode, sympy.Expr, TorchBindObject]]input_nodeslist[IRNode]output_nodeszdict[str, bool]input_deallocationboolskip_cudagraph	list[str]constant_namesN__name__
__module____qualname____annotations__ r   r   r   r      s/     ,+ GF (' r   r   c                "    dfd |        y )Nc                   | y t        | t        t        f      r| D ]
  } |        y t        | t              r| j	                         D ]
  } |        y t        | t
        t        t        t        t        j                  j                  j                  t        t        t        t         f	      sJ dt#        |        d       y )NzFound zE, which is not a supported top level IR node. See [Note: Inductor IR])r   listtupledictvalues
ExpandViewDynamicScalarAssertScalarr   sympylogicboolalgBooleanr    r   EffectfulKernelShapeAsConstantBuffertype)nodesnode_check_tensorboxs     r   r   z%validate_ir.<locals>._check_tensorbox   s     =e}- ' &'t$ ' &' ! KK''//#)
  e%jk r   )r   Optional[_NodeOrNodes]returnNoner   )node_or_nodesr   s    @r   validate_irr      s    < ]#r   c                T     t         t              sJ t                      d fd}|S )Nc                 0     t        t              | i |S r   )getattrrl   )argskwargsnames     r   fnzops_wrapper.<locals>.fn  s    !wsD!42622r   )r   objectr   r   r   rm   )r   strr   )r   r   s   ` r   ops_wrapperr   
  s(    dC ,$t*, 3 Ir   c           
     b    t        t        | t        t        |                         dfd}|S )Nc                    t        |       t              k(  sJ t        t        |             D cg c]
  }| |       c}S c c}w r   lenrange)indexi	inv_orders     r   reindexz inverse_reorder.<locals>.reindex  s?    5zS^+++-23u:->?il#???   Ar   Sequence[_T]r   r   )r   zipr   r   )orderr   r   s     @r   inverse_reorderr     s*    Sc%j 123I@ Nr   c                     d fd}|S )Nc                    t        |       t              k(  sJ t        t        |             D cg c]
  }| |       c}S c c}w r   r   )r   r   r   s     r   r   zsame_reorder.<locals>.reindex  s>    5zSZ''').s5z):;AeAh;;;r   r   r   )r   r   s   ` r   same_reorderr     s    < Nr   c                     d fd}|S )Nc                       |             S r   r   )r   reindex1reindex2s    r   r   z fuse_reindexing.<locals>.reindex)  s    ((r   )r   r   r   zSequence[_V]r   )r   r   r   s   `` r   fuse_reindexingr   %  s    ) Nr   )   r      rA   )   r   r   r   rA   c                `    |t        d | D              rt        |       }|S t        ||       }|S )z1
    Convert strides to fill order (argsort)
    c              3  \   K   | ]$  }t        |t        t        j                  f       & y wr   r   r   r   r!   .0ss     r   	<genexpr>z!get_fill_order.<locals>.<genexpr>9  s      Q
1sEMM.B CQ   *,)allrV   rW   )seq	shape_env
sorted_idxs      r   get_fill_orderr   3  s<     CQSQQ$+CL
  !C0
r   c                    t        |       D ci c]  \  }}||
 }}}t        t        |             D cg c]  }||   	 }}|S c c}}w c c}w )z
    Convert stride order to fill order
    For channel last format,

    stride order = [3, 0, 2, 1] and fill order = [1, 3, 2, 0]
    )	enumerater   r   )r   idxposlookupr   
fill_orders         r   stride_order2fill_orderr   A  sR     (1'7883c3h8F8%*3u:%67&)7J7 97s
   AAc                    t        | |      }t        t        |             D cg c]  }d }}t        |      D ]
  \  }}|||<    |S c c}w )z)
    Convert strides to stride order
    r   )r   r   r   r   )r   r   r   _outr   elems          r   get_stride_orderr   M  sV     !/sI >JCHo
&1
&C
&Z( 4D	J 's   	Ac                     y r   r   r   guard_shapes     r   ir_node_to_tensorr   Z  s    BEr   c                     y r   r   r   s     r   r   r   ^  s    LOr   c                   | y |s%t         j                  j                  j                  }nt        }| j                         D cg c]
  } ||       }}t        |       r.| j                         j                  D cg c]
  } ||       }}nt        j                  |      }| j                         }| j                         }t        |      }t        |      }t         j                  j                  j                  j                         5  t!        j"                  ||||      j%                         }d d d        |S c c}w c c}w # 1 sw Y   S xY w)N)sizestridedtypedevice)rn   graphsizevars	size_hintr#   get_sizeis_storage_and_layout
get_layoutr   FlexibleLayoutcontiguous_strides	get_dtype
get_devicer\   r   suppress_guardstorchempty_stridedzero_)	r   r   shape_fnr   r   r   r   r   ts	            r   r   r   b  s    	y 77##--!".AHQK.D.Q'(||~'<'<=!(1+==2248KKME\\^F"4(D$V,F	
			#	#	3	3	5 fE&

%' 	
 H / > Hs   D<;E
(EEc                0    t        | t              r| sd gS | S r   )r   r   values    r   may_convert_to_optionalr    s     %u vLr   c                    t        | t              s| | S t        | t        j                        r| j                  S t        | t
        t        f      rt        | j                               S t        d|  dt	        |       j                   d       y )Nzget_device_type(: ))r   r   r  r   r   r   
OutputSpecget_device_typer  r   r   r   s    r   r  r    sq     !SQY	Au||	$vv	A
+	,q||~..#A3ba)9)9(:!<=r   c                    t        |       }|dv rt        t        | d      dk(  ryy|t        |      x}yddlm} t        |t              sJ t        |             t        ||      S )N)cpucuda_backendtritonTFrA   )TritonScheduling)	r  r   rB   rF   codegen.tritonr  r   r   
issubclass)r   r   device_schedulingr  s       r   	is_tritonr    sy    QF  6fXX./8;!:6!BBK0'.G5F0GG.')9::r   c                    t        |       dk(  S )Nr  )r  r   s    r   is_cpur!    s    1&&r   c                @   t        | t              rB| j                         2t        | j	                               st        | j                               ryt        j                  fd| j	                         d d D         }t        j                  t        j                  | j	                         d   d      t        j                  | j                         d   d            }t        j                  ||      }t        j                  j                  j                  |      S )NFc              3  ^   K   | ]$  }t        j                  t        |      d        & yw)r   N)r   Eqr>   )r   r   	alignments     r   r   z-is_aligned_realized_tensor.<locals>.<genexpr>  s#     	FQ%((3q)$a
(	Fs   *-rA   )r   r   maybe_get_strider2   
get_strider   r   AndOrr$  Lern   r   r   guard_or_false)r   r%  aligned_stridesaligned_last_dim
is_aligneds    `   r   is_aligned_realized_tensorr0    s    q&!' 0 .ii	F!,,."2E	FO xx#Q'!**,r2BA)F ?,<=J 77**:66r   c                   t        |      t        |       k(  rt        |       t        |      k(  sJ t        || |      D ]  \  }}}t        j                  j                  j                  |d      r2t        j                  j                  j                  ||      r]t        j                  j                  j                  |      t        j                  j                  j                  |      k7  s y y)zP
    Returns true if the strides are equal, ignoring dimensions of size 1 .
    rA   FT)r   r   rn   r   r   statically_known_leqstatically_known_equalssymbolic_hint)strides1strides2shapedims1s2s         r   significant_strides_equalr;    s     u:X&3x=CM+III5(H5 R7700a8ww77
gg,,R0AGG4D4D4R4RSU4VV r   c                Z   t        |       s| S t        d t        || j                               D              r| S t	        || j                         | j                               s| S t        |       \  }}g |j                  }t        | j                               D ]8  \  }}t        j                  j                  j                  |d      s1||   ||<   : t        |j                  |j                  |j                   ||j"                  |j$                        }t'        t)        ||            S )a  
    Tries to match the strides of the tensor to those in the meta_strides. Strides of insignificant
    dimensions - size 0 or 1 - will be updated.

    If there are real stride differences (NHWC vs NCHW), or the tensor is not realized, then the input will be returned
    c              3  v   K   | ]1  \  }}t         j                  j                  j                  ||       3 y wr   rn   r   r   r3  r   r9  r:  s      r   r   z2try_match_insignificant_strides.<locals>.<genexpr>  s3      B 	
00R8   79rA   datalayout)r  r   r   r(  r;  r   as_storage_and_layoutr   r   rn   r   r   r2  FixedLayoutr   r   r   offset	is_pinnedr   ReinterpretView)tensorstridesstorage
old_layout
new_strider   r   
new_layouts           r   try_match_insignificant_stridesrO    s    !(
 '6#4#4#67  $Wf.?.?.A6??CTU/7GZ%:$$%J&//+, '17700A6#AJJqM' J _'*EFFr   c                    | j                   j                  d      d   }t        |j                        D cg c]  \  }}|	 c}}|j                  d<   ddlm}  ||        y c c}}w )Noutputopr   user_visible_output_idxs)record_original_output_strides)r   
find_nodesr   r   metatorch._inductor.compile_fxrU  )gmoutput_noder   r   rU  s        r   gm_original_output_stridesr[    sa    ((%%%215K#K$4$454Q4K/0 J"2&4s   A$c                    t               }| D ]>  }|t        |j                         d      z  }|t        |j                         d      z  }@ t	        |      S )NFunbacked_only)r:   r(   r   r(  r   )inputssym_varsinps      r   get_symbolic_inputsrb  	  sW    !+H L$S\\^5II$S^^%5UKKL >r   c                    t        | t              r| j                  } t        | t              r| j	                         } t        | t
              r| j                  } t        | t              r| j                         S d S r   )r   r   rB  BaseViewunwrap_view
StorageBoxBufferget_namer   s    r   try_get_nameri    sW    !YFF!XMMO!Z FF%a01::<:d:r   c                  D   e Zd ZU dZ e       Zded<    ej                  d      Z	ded<    ej                  d      Z
ded	<    ej                  d      Zd
ed<    ej                  d      Zded<   eej                  dId              ZedJd       ZdKdZdLdZdMdZdNdZdOdZdPdZdMdZdQdRdZ	 dS	 	 	 	 	 	 	 dTdZdUdZdVdZdWdZdXdZdYdZdZdZ d[dZ!d\d Z"d]d!Z#e$d^d"       Z%d_d#Z&d[d$Z'd`d%Z(dadbd'Z)dcd(Z*ddd)Z+d[d*Z,ded+Z-dfd,Z.dgd-Z/d]d.Z0dhd/Z1d`d0Z2d[d1Z3dadid2Z4djd3Z5dLd4Z6dkd5Z7dLd6Z8	 dl	 	 	 	 	 dmd7Z9dnd8Z:dod9Z;	 dl	 	 	 	 	 dpd:Z<dqd;Z=drd<Z>dsd=Z?dtd>Z@	 dl	 	 	 dud?ZAd`d@ZBd\dAZCd[dBZDd[dCZEdvdDZFdwdEZGdhdFZHdwdGZIeJr
e$dUdH       ZKy&y&)xr   zBase class for all intermediate representation (IR) nodes in TorchInductor.

    Note:
        This is an abstract base class. Most methods raise NotImplementedError
        and must be overridden by concrete subclasses.
    zClassVar[OrderedSet[Any]]_current_originsF)initOrderedSet[Any]originsOptional[list[str]]	tracebackOptional[torch.fx.Node]origin_nodedict[str, Any]r   c              #     K   t         j                  }|| z  t         _        	 d  |t         _        y # |t         _        w xY wwr   )r   rk  )rn  olds     r   current_originszIRNode.current_origins.  s7      %%"%-	*&)F#cF#s   A2 A?Ac                L    t        | t        t        t        t        t
        f      S r   )r   ComputedBufferInputsKernelInputBufferrH  TemplateBuffer)r   s    r   is_realized_nodezIRNode.is_realized_node8  s&    	
 		
r   c                2    t         j                  | ||       y r   )r   __setattr__)selfattrr  s      r   _post_init_setattrzIRNode._post_init_setattrE  s     	4u-r   c                   t        | j                        }| j                  d|       | j                  dt        j                  rt        j                         nd        | j                  dd        | j                  di        y )Nrn  rp  rr  r   )r:   rk  r  rB   debug_ir_tracebackrp  format_stack)r  rn  s     r   __post_init__zIRNode.__post_init__K  sk    T223	73V5N5N//1TX	
 	t4r2r   c                B    t        d | j                         D              S )Nc              3  4   K   | ]  }|j                     y wr   r   r   deps     r   r   z(IRNode.get_read_names.<locals>.<genexpr>V       ?s#((?   r:   	get_readsr  s    r   get_read_nameszIRNode.get_read_namesU      ?dnn.>???r   c                    | j                   S r   )rp  r  s    r   get_tracebackzIRNode.get_tracebackX  s    ~~r   c                    | j                   S r   rr  r  s    r   get_origin_nodezIRNode.get_origin_node[      r   c                     y r   r   r  s    r   get_defining_opzIRNode.get_defining_op^      r   c                t   t               }| j                  }t        | t              r(| j	                         }| j
                  rt        |g      }|D ]  }t        |d      r(|j                  r|j                  |j                         7t        j                  j                  j                  j                  di       j                  |j                  g       }t        |t              s|D ]J  }t        j                  j                  j                   j                  |d       }|s:|j                  |       L  |S )Nstack_trace	postToPre)r:   rn  r   ExternKernelr  rr  hasattrr  addr  	_inductordebug _inductor_post_to_pre_grad_nodesgetr   r   #_inductor_pre_grad_node_stack_trace)r  stack_tracesrn  rr  r   pre_grad_nodes	node_namer  s           r   get_stack_traceszIRNode.get_stack_tracesa  s    )3,,dL)..0K$k]3 	6Dt]+0@0@  !1!12 OO))JJNN# c$))R(  ".$7!/ 6I--QQUU%t  
 #$((56	6, r   c                   dt        | dd       }|rt        |      dkD  r|d d  d}| j                         s|gS g }| j                         D ]8  }|j                  d       ||j	                  d      z  }|j                  d	       : |g|z   S )
Nzorigins=rn   @   =   z...zstack_traces = {
})r   r   r  appendsplit)r  shortenrn  stack_trace_strr  s        r   common_reprzIRNode.common_repr  s    WT9b9:;s7|b( "c*G$$&9002 	(K""#56{0066O""3'	( y?**r   c                $   t        |      t        | j                  |            z   }t        t        t        |            }|r5t	        dj                  |            }t        |       j                   d| dS t        |       j                   d| dS )Nz,
z(
z
)(r  )r   r  mapr   indentjoinr   r   )r  linesr  	multiline	new_liness        r   
str_helperzIRNode.str_helper  s     Ud4#3#3G#<==Se_%uzz%01I4j))*#i[<<4j))*!E7!44r   c                    | j                   S r   r   r  s    r   r  zIRNode.get_dtype      zzr   c                B    	 | j                         S # t        $ r Y y w xY wr   )r  NotImplementedErrorr  s    r   maybe_get_dtypezIRNode.maybe_get_dtype  s&    	>>##" 		    	c                2    t        dt        |        d      )Nz#get_layout() is not implemented by !r  r   r  s    r   r  zIRNode.get_layout  s    !$GT
|ST"UVVr   c                B    	 | j                         S # t        $ r Y y w xY wr   )r  r  r  s    r   maybe_get_layoutzIRNode.maybe_get_layout  &    	??$$" 		r  c                "    | j                         S r   )r  r  s    r   get_output_speczIRNode.get_output_spec  s      r   c                B    	 | j                         S # t        $ r Y y w xY wr   )r  r  r  s    r   maybe_get_output_speczIRNode.maybe_get_output_spec  s(    	''))" 		r  c                >    t        | j                         t              S )z4True for single tensor output (excludes MultiOutput))r   r  Layoutr  s    r   has_tensor_outputzIRNode.has_tensor_output  s    $446??r   c                2    t        dt        |        d      )Nz!get_size() is not implemented by r  r  r  s    r   r   zIRNode.get_size  s    !$Ed4j\QR"STTr   c                B    	 | j                         S # t        $ r Y y w xY wr   )r   r  r  s    r   maybe_get_sizezIRNode.maybe_get_size  %    	==?"" 		r  c                "    | j                         S r   r   r  s    r   r7  zIRNode.shape  s    }}r   c                4    t        | j                               S r   )ri   r   r  s    r   	get_numelzIRNode.get_numel  s    T]]_--r   c                    t         j                  j                  j                  t	        j
                  | j                         d            S Nr   rn   r   r   statically_known_truer   r$  r  r  s    r   is_zero_elementszIRNode.is_zero_elements  0    ww55ehht~~?OQR6STTr   c                0    t        dt        |              )a)  
        If the IRNode refers to data which has not been materialized (e.g.,
        it is a Pointwise/Reduction that could potentially have more
        compute fused into it), realize the IRNode into physical memory,
        ending the possibility of fusing into it, but allowing, e.g., multiple
        users to access the data without having to recompute.

        Check StorageBox.realize for a particularly notable implementation.

        TODO(ezyang): I think, in principle, every IRNode should have an
        implementation of this, and most of the time no-op is OK, but you
        really do have to audit each IRNode for this, so for now, raise
        an error if it's not implemented.  Note that some code in graph.py
        will catch this thrown error and suppress it with a warning.
        zrealize NYI on r  r  s    r   realizezIRNode.realize  s      "ODJ<"@AAr   Nc                0    t        dt        |              )Nzcodegen_reference NYI on r  r  writers     r   codegen_referencezIRNode.codegen_reference  s    !$=d4j\"JKKr   c                     y r   r   r  s    r   r  zIRNode.get_device  r  r   c                .    | j                         }|J |S r   )r  r  r   s     r   get_device_or_errorzIRNode.get_device_or_error  s    "!!!r   c                     yNFr   r  s    r   has_exceeded_max_readszIRNode.has_exceeded_max_reads      r   c                >    t        t        |       j                        r   r  r   r   r  s    r   make_loaderzIRNode.make_loader      !$t*"5"566r   c                >    t        t        |       j                        r   r  r  s    r   make_indexerzIRNode.make_indexer  r  r   c                >    t        t        |       j                        r   r  r  s    r   r(  zIRNode.get_stride  r  r   c                B    	 | j                         S # t        $ r Y y w xY wr   )r(  r  r  s    r   r'  zIRNode.maybe_get_stride  r  r  c                >    t        t        |       j                        r   r  r  s    r   rh  zIRNode.get_name  r  r   c                B    	 | j                         S # t        $ r Y y w xY wr   )rh  r  r  s    r   maybe_get_namezIRNode.maybe_get_name  r  r  c                v    	 | j                         t        j                  j                  v S # t        $ r Y yw xY wr  )rh  rn   r   graph_inputsr  r  s    r   is_input_bufferzIRNode.is_input_buffer  s4    	==?agg&:&:::" 		s   ), 	88c                     yr  r   r  	thresholds     r   has_large_inner_fnzIRNode.has_large_inner_fn  r  r   c                     y r   r   r  userss     r   
mark_reusezIRNode.mark_reuse      r   c                     y r   r   r  s    r   realize_hintzIRNode.realize_hint  r  r   c                >    t        t        |       j                        r   r  r  s    r   re  zIRNode.unwrap_view  r  r   c                >    t        t        |       j                        r   r  r  s    r   freeze_layoutzIRNode.freeze_layout  r  r   c                >    t        t        |       j                        r   r  r  r   allow_paddings      r   freeze_layout_with_stride_orderz&IRNode.freeze_layout_with_stride_order       "$t*"5"566r   c                >    t        t        |       j                        r   r  r  r   s     r   freeze_layout_with_fill_orderz$IRNode.freeze_layout_with_fill_order!  r  r   c                >    t        t        |       j                        r   r  r  r   s     r   freeze_layout_with_same_orderz$IRNode.freeze_layout_with_same_order$  r  r   c                >    t        t        |       j                        r   r  r  exact_stridesr  s      r    freeze_layout_with_exact_stridesz'IRNode.freeze_layout_with_exact_strides'  r  r   c                >    t        t        |       j                        r   r  r  s    r   get_read_writeszIRNode.get_read_writes,  r  r   c                6    | j                         j                  S r   r  readsr  s    r   r  zIRNode.get_reads/      ##%+++r   c                4    t        | j                               S r   )r   r  r  s    r   	num_readszIRNode.num_reads2  s    4>>#$$r   c                >    t        t        |       j                        r   r  r  s    r   get_storage_numelzIRNode.get_storage_numel5  r  r   c                >    t        t        |       j                        r   r  r  r^  s     r   get_free_symbol_useszIRNode.get_free_symbol_uses8  r  r   c                >    t        t        |       j                        r   r  r  s    r   get_reduction_typezIRNode.get_reduction_type=  r  r   c                >    t        t        |       j                        r   r  r  s    r   get_reduction_sizezIRNode.get_reduction_size@  r  r   c                     yr  r   r  s    r   	is_externzIRNode.is_externC  r  r   c                     yr  r   r  s    r   is_no_opzIRNode.is_no_opF  r  r   c                >    t        t        |       j                        r   r  r  s     r   constant_to_devicezIRNode.constant_to_deviceI  r  r   c                >    t        t        |       j                        r   r  r  s    r   get_mutation_nameszIRNode.get_mutation_namesL  r  r   c                >    t        t        |       j                        r   r  r  s    r   get_operation_namezIRNode.get_operation_nameO  r  r   c                >    t        t        |       j                        r   r  r  s    r   get_inputs_that_alias_outputz#IRNode.get_inputs_that_alias_outputR  r  r   c                     y r   r   r  s    r   r   zIRNode.dtypeW  s    (+r   )rn  zOrderedSet[Node]r   zGenerator[None, None, None]r   r   r   r   )r  r   r  r   r   r   r   r   r   OrderedSet[str])r   ro  r   rq  r   zOptional[Operation]T)r  r   r   Sequence[str])TT)r  zSequence[object]r  r   r  r   r   r   r   torch.dtype)r   zOptional[torch.dtype]r   r  )r   zOptional[Layout]r   r  )r   zOptional[OutputSpec]r   r   r   Sequence[Expr])r   Optional[Sequence[_IntLike]])r   z.Union[_IntLike, sympy.Rel, Sequence[_IntLike]]r   r    r   Optional[str]r   r  zOptional[IndentedBuffer]r   r   r   Optional[torch.device]r   torch.devicer   $Callable[[Sequence[Expr]], OpsValue]r    Callable[[Sequence[Expr]], Expr]r   Sequence[_IntLike]r   r   r  Optional[int]r   r   r  r   r   r   r   r   Fr   Sequence[int]r  r   r   r   r   r\  r   r   r   rT  r   r   r  rT  r  r   r   r   r   dependencies.ReadWritesr   zOrderedSet[Dep]r   r   r   rz   r^  r   r   r   r   rN  r   r   r   r>  )Lr   r   r   __doc__r:   rk  r   dataclassesfieldrn  rp  rr  r   staticmethod
contextlibcontextmanagerrv  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  propertyr7  r  r  r  r  r  r  r  r  r  r(  r'  rh  r  r  r  r  r  re  r	  r  r  r  r  r  r  r   r"  r%  r'  r)  r+  r-  r/  r1  r3  r5  r   r   r   r   r   r   r     sf    3=,/>  1{00e<G_<%6[%6%6E%BI"B+<;+<+<%+HK(H"3+"3"3"?K?*  * 

 

.3@ B+  PT
5%
504
5HL
5	
5W!@U  .UB$L
777777 ;@7"7377	7
77 HM7/7@D7	7
7,%7 %*7!7	!7
777777 	+ 
+ r   c                      e Zd ZddZddZddZddZddZddZddZ	ddZ
dd	Zdd
ZddZddZddZ	 d	 	 	 ddZddZy) 	Operationc                    d | _         y r   operation_namer  s    r   r  zOperation.__post_init__]  s
    -1r   c                    t         r   r  r  s    r   r  zOperation.get_device`      !!r   c                6    t        | d      sJ | j                  S Nrr  )r  rr  r  s    r   r  zOperation.get_origin_nodec  s    t]+++r   c                6    t        | d      sJ | j                  S )Nrn  )r  rn  r  s    r   get_originszOperation.get_originsg  s    tY'''||r   c                6    | j                   J | j                   S r   rr  r  s    r   r3  zOperation.get_operation_namek  s     ""..."""r   c                     yr  r   r  s    r   r+  zOperation.is_externo  r  r   c                     yr  r   r  s    r   r-  zOperation.is_no_opr  r  r   c                    t         r   ru  r  s    r   r  zOperation.get_read_writesu  rv  r   c                &    || j                         v S r   )r  r  r   s     r   
is_user_ofzOperation.is_user_ofx  s    t**,,,r   c                B    t        d | j                         D              S )Nc              3  4   K   | ]  }|j                     y wr   r  r  s     r   r   z+Operation.get_read_names.<locals>.<genexpr>|  r  r  r  r  s    r   r  zOperation.get_read_names{  r  r   c                6    | j                         j                  S r   r  r  s    r   r  zOperation.get_reads~  r  r   c                    t         r   ru  r  s    r   get_outputszOperation.get_outputs  rv  r   c                    t               S r   r9   r  s    r   get_unbacked_symbol_defsz"Operation.get_unbacked_symbol_defs  
    |r   c                    t               S )a  
        When unbacked_only=True:
        Returns the unbacked symbols which are required to be in scope in
        order to successfully perform codegen for this buffer.  For example,
        a buffer that corresponds to an extern kernel call that takes i0 as
        an argument would return {i0} here.  This is used to generate necessary
        dependencies that ensure we actually bind i0 in codegen before you
        try to use it.

        Note that this is NOT transitive; in particular, if this buffer takes
        in as input another buffer with dynamic shape (e.g., (i0,)), we will
        not report it here, because you will already have a dependency
        on that buffer, which will eventually have a dependency on i0 if
        necessary.

        When unbacked_only=False:
        Similar to `unbacked_only=True` but including all free symbols
        instead of only free unbacked symbols.
        r9   r$  s     r   r%  zOperation.get_free_symbol_uses  s    , |r   c                     y)z
        Gets extra global memory size needed by this buffer.
        Some algorithms (e.g. group gemm) may require extra global memory in the generated code.
        r   r   r  s    r   get_workspace_sizezOperation.get_workspace_size  s    
 r   Nr8  rK  r;  )r   rm  rU  rC  r`  )r   r   r   r   r9  rb  r   list[Buffer]r   r   rZ  re  rc  )r   r   r   r  r  r  rz  r3  r+  r-  r  r  r  r  r  r  r%  r  r   r   r   rp  rp  [  sc    2" #"-@," %*!	!0r   rp  c                  >   e Zd ZU ded<   ded<   ded<   ded<    ed       	 d	 	 	 dd	       Zd d
Zd!dZeZd"dZ	d#dZ
d$dZd$dZed%d       Zeej"                  fd&d       Zed'd       Zd(dZed!d       Zd)d*dZdd+dZd,dZd-dZd.dZd$dZd/dZd0dZy)1LoopsrN  r   r@  r   Callable[..., Any]inner_fnrT  rangesc                     t               j                  g fd| j                  D        | j                         S )Nc              3  6   K   | ]  }t        |        y wr   r'   r   er^  s     r   r   z-Loops.get_free_symbol_uses.<locals>.<genexpr>  s     FQq-0F   )r:   unionr  inner_fn_free_symbolsr$  s    `r   r%  zLoops.get_free_symbol_uses  s@     "z|!! 
F$++F
&&}5
 	
r   c                   | j                  d| j                  j                   dt        | j                        | j                         g|D cg c]  }| dt        | |        c}z   d| j                  gz         S c c}w )N'=origin_node=)r  r   r   r   r   inner_fn_strr   rr  )r  namesr   s      r   _to_strzLoops._to_str  s    DKK$$%Q'DJJ!!#
 <AA4$qt,-.AB d..1234
 	
 Bs   A?
c                $    | j                  d      S Nr  r  r  s    r   __str__zLoops.__str__      ||K((r   c                    | j                   S r   r   r  s    r   r  zLoops.get_device      {{r   c                    | j                   S r   r  r  s    r   r  zLoops.get_origin_node  r  r   c                    | j                   S r   r  r  s    r   r   zLoops.get_size  r  r   c                    | j                   S r   r  r  s    r   get_pointwise_sizezLoops.get_pointwise_size  r  r   c                    |j                  dd       }|j                  dd       } | |i |}|j                  d|       |j                  d|xs |j                         t        j	                  |      S )Nrr  rp  )popr  rp  r   create)clsr   r   rr  tbrs         r   r  zLoops.create  sm    jj5ZZT*   	
]K8	["*;<""r   c                    t        |       D cg c]0  \  }}|dk(  rt        j                  j                  nt	        ||      2 c}}S c c}}w NrA   )r   r   SZerorh   )r  r~   nr   s       r   _indexzLoops._index  sH     "&)
1 FEGGLL(Fvq(QQ
 	
 
s   5Ac                `   t        t        j                               }t        j                  |      5  t	        j
                  t        dd      5   | j                  | j                           |j                         cd d d        cd d d        S # 1 sw Y   nxY wd d d        y # 1 sw Y   y xY wNallow_indexingT)
rO   rn   MockHandlerset_ops_handlerr   r   r  r  inner_fn_argsgetvalue)r  	opcounters     r   inner_fn_opcountzLoops.inner_fn_opcount  s     1	i(	(LL)94@	( DMM4--/0%%'	( 	( 	( 	( 	( 	( 	(s#   B$-B<	B$B	B$$B-c                :    | j                  | j                        fS r   )r  r  r  s    r   r  zLoops.inner_fn_args  s    DKK(**r   c                r    t        j                  j                  | j                  g| j	                          S r   )rn   KernelFormatterHandlerir_to_stringr  r  r  s    r   r  zLoops.inner_fn_str  s3    ''44MM
 ..0
 	
r   Nc                x    |d}t        |t        j                        }| j                         j                  |kD  S r  )maxrB   realize_opcount_thresholdr  num_opsr  s     r   r  zLoops.has_large_inner_fn  s9    I	6#C#CD	$$&..::r   c                h    | j                  | j                        }t        | j                  ||      S Nr]  )r  r  rJ   r  )r  r^  r   s      r   r  zLoops.inner_fn_free_symbols  s'    DKK(#DMM5VVr   c                |   t        j                  t        dd      5  | j                         rJt	        | j                         | j                         | j                               j                  cd d d        S t	        | j                         | j                               j                  cd d d        S # 1 sw Y   y xY wr  )	r   r   r  r'  rL   r  r   r)  r  r  s    r   r  zLoops.get_reads  s    \\.*:DA 	&&(*$$&MMO++- %	 	 +$$&MMO %	 	 	s   AB271B22B;c                H    t        | j                         j                        S r   )r:   r  read_buffersr  s    r   r  zLoops.get_read_names  s    $//1>>??r   c                H    t        | j                         j                        S r   )r   r  r  r  s    r   r   zLoops.num_reads  s    4((*7788r   c                2    t        dt        |        d      )Nz+get_reduction_size() is not implemented by r  r  r  s    r   r)  zLoops.get_reduction_size      !9$t*QG
 	
r   c                2    t        dt        |        d      )Nz+get_reduction_type() is not implemented by r  r  r  s    r   r'  zLoops.get_reduction_type  r  r   c                2    t        dt        |        d      )Nz+constant_to_device() is not implemented by r  r  r  s     r   r/  zLoops.constant_to_device!  r  r   rZ  re  )r  r>  r   r   rU  rK  r;  rD  )r   r   r   r   r   r   )r  rT  r~   r@   r   rE  )r   rP   r   zSequence[Sequence[_IntLike]]r   rV  r^  r   r   OrderedSet[Symbol]rb  r9  rc  rH  rf  )r   r   r   r   rY   r%  r  r  __repr__r  r  r   r  classmethodr  rk  r@   INDEXr  rX   r  r  r  r  r  r  r  r   r)  r'  r/  r   r   r   r  r    s      G$$)
!
	!
 %
	
) H  	# 	# :>** 
 
 ( (+ 
 

;W@9




r   r  c                   |j                   rt        j                  t        d      |      S t        j                  d|      S )Nnanr   )is_floating_pointrl   constantfloat)r   r   s     r   nop_loader_fnr  '  s1    ||E%L%00||Au%%r   c                  P    e Zd ZddZd	dZeZd
dZddZ	 	 	 	 	 	 	 	 ddZddZ	y)	Pointwisec                p    | j                         rt        t        | j                        S | j                  S Nr  )r  r   r  r   r  r  s    r   r  zPointwise.make_loader0  s)      "=

;;}}r   c                $    | j                  d      S r  r  r  s    r   r  zPointwise.__str__7  r  r   c                    g S r   r   r  s    r   r)  zPointwise.get_reduction_size<  s    	r   c                     y r   r   r  s    r   r'  zPointwise.get_reduction_type?  r  r   c                p    | j                         }t        j                  |xs d ||       ||            S Nunnamed)r  rl   storer  output_nameindexervarsloaders        r   store_outputzPointwise.store_outputB  s2     !!#yy1	74=&,OOr   c                    | j                         } t        j                  t        d|      |      }t	        || j
                  || j                        S FMove this to a given device. Requires that all reads are to constants.override_devicer   r   r  r  )r  r   r   ConstantBufferr  r   r  r  r   r  s      r   r/  zPointwise.constant_to_deviceK  sK    !!#Hn.?HP**;;	
 	
r   NrO  rU  )r   zSequence[sympy.Expr]rH  )r  rI  r  !Callable[[Sequence[Expr]], Never]r  rE  r   r   rf  )
r   r   r   r  r  r  r)  r'  r  r/  r   r   r   r  r  .  sR    ) HP"P 3P 	P
 
P	
r   r  c                  F    e Zd ZU ded<   dZded<   ddZ	 	 	 	 	 	 	 	 d	dZy)
ScatterrR  output_indexerNrR   scatter_modec                    | j                         } t        j                  t        d|      |      }t	        || j
                  || j                  | j                  | j                        S )r  r  )r   r   r  r  r  r  )	r  r   r   r  r  r   r  r  r  r  s      r   r/  zScatter.constant_to_device\  s]    !!#Hn.?HP**;;..**
 	
r   c                    | j                         }|d}t        j                  | || j                  |             ||      | j                        S )Nr  )mode)r  rl   r  r  r  r  s        r   r  zScatter.store_outputi  sT     !!##KyyD''-.4L""	
 	
r   rf  )r  rI  r  r  r  rE  r   r   )r   r   r   r   r  r/  r  r   r   r   r  r  W  sB    44"L)"

"
 3
 	

 

r   r  
logical_ormaximumminimummulr  bitwise_xor)anyr  minprodsumdotxor_sumz"dict[str, Callable[..., OpsValue]]REDUCTION_COMBINE_FNc                      t         v r	t             S  dv r	 	 	 	 	 	 d fd}|S  dk(  r	 	 	 	 	 	 dd}|S t        d        )Nargmaxargminc                   | \  }}|\  }}dk(  rt        j                  ||      }nt        j                  ||      }t        j                  ||      }t	              rt        j
                  ||      }t        j
                  ||      }	t        j                  |t        j                  ||	            }t        j                  |t        j                  ||	            }rt        j                  ||      nt        j                  ||      }
t        j                  |t        j                  ||
            }t        j                  |||      t        j                  |||      fS )Nr  )	rl   ltgteqr,   ner  logical_andwhere)aba_valuea_indexb_valueb_indexmaskequala_isnanb_isnantiearg_break_ties_leftr   reduction_types              r   argmax_combine_fnz3get_reduction_combine_fn.<locals>.argmax_combine_fn  s     !GW GW)vvgw/vvgw/FF7G,Ee$&&'2&&'2~~dCFF7G,DEucoogw.OP ' w(VVGW- 
 >>$s(CDD		$1		$1 r   welford_combinec                l    | \  }}}|\  }}}||z
  }||z   }	||	z  }
|||
z  z   ||z   ||z  |z  |
z  z   |	fS r   r   )r  r  a_meana_m2a_weightb_meanb_m2b_weightdelta
new_weight	w2_over_ws              r   welford_combine_fnz4get_reduction_combine_fn.<locals>.welford_combine_fn  sm     &'"FD(%&"FD(VOE!H,J :-I**teemh6BB r   zunknown reduction_type=)r  tuple[object, object]r  r4  r   tuple[OpsValue, OpsValue])r  #tuple[OpsValue, OpsValue, OpsValue]r  r6  r   r6  )r  r  )r&  r   r%  r'  r3  s   ```  r   get_reduction_combine_fnr7    s     --#N33	/	/	$	)>	&	: ! 	,	,	2	2	 1	  "! "$;N;K"LMMr   c                  
    e Zd ZU ded<   ded<   ded<   ded<   dd	ZeZ ed       d d! fd
       Zd"dZd#dZ		 	 	 	 	 	 	 	 	 	 d$dZ
d%dZd&dZd d!dZd'dZe	 d(	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d)d       Ze	 	 	 	 	 	 	 	 	 	 d*d       Zeej(                  df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d+d       Ze	 	 	 	 	 	 d,d       Ze	 	 	 	 	 	 d,d       Ze	 	 	 	 	 	 	 	 d-d       Ze	 	 	 	 	 	 d.d       Ze	 d(	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d/d       Ze	 	 	 	 	 	 	 	 	 	 	 	 d0d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d1d       Ze	 d(	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d2d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d3d       Z xZS )4	ReductionrT  reduction_rangesrQ   r&  r@  	src_dtyperU   reduction_hintc                $    | j                  d      S )N)r  r:  r&  r  r  s    r   r  zReduction.__str__  s    ||LMMr   c                z    t         |          t               j                  fd| j                  D         z  S )Nc              3  6   K   | ]  }t        |        y wr   r'   r  s     r   r   z1Reduction.get_free_symbol_uses.<locals>.<genexpr>  s     PQq-0Pr  )superr%  r:   r  r:  r  r^  	__class__s    `r   r%  zReduction.get_free_symbol_uses  s:    w+M:=OZ\=O=OP$:O:OP>
 
 	
r   c                    | j                   S r   )r:  r  s    r   r)  zReduction.get_reduction_size  s    $$$r   c                    | j                   S r   )r&  r  s    r   r'  zReduction.get_reduction_type      """r   c           	         t        j                  | j                  | j                  | j                  | j                  ||            }t        j                  |xs d ||      |       y r  )rl   	reductionr   r;  r&  r  store_reduction)r  r  r  r  reduction_varsr  s         r   rH  zReduction.store_reduction  sT     JJNNMM$/	
 	K49gdmUKr   c                X    t        | j                        t        | j                        z   S r   )r   r  r:  r  s    r   index_lengthzReduction.index_length  s!    4;;#d&;&;"<<<r   c                    | j                  | j                        }| j                  | j                  t        j                        }||fS r   )r  r  r:  r@   R0_INDEX)r  r   rindexs      r   r  zReduction.inner_fn_args  s8    DKK(T22DMMBvr   c                    | j                  | j                        }| j                  | j                  t        j                        }t        | j                  |||      S r  )r  r  r:  r@   rM  rJ   r  )r  r^  r   rN  s       r   r  zReduction.inner_fn_free_symbols  sH    DKK(T22DMMB#MM5&
 	
r   c           
     
   | j                         } t        j                  t        d|      |      }t	        || j
                  || j                  | j                  | j                  | j                  t        j                        S )r  r  r   r   r  r  r:  r&  r;  r<  )r  r   r   r  r9  r   r  r:  r&  r;  rU   DEFAULTr  s      r   r/  zReduction.constant_to_device  sm    !!#Hn.?HP**;;!22..nn(00	
 		
r   Nc	           
        t         j                  j                  j                  |      }	t         j                  j                  j                  t	        |            }
|dk(  xsG t         j                  j                  | t        j                         xr |dvxr t        j                  }t        |	      rt        |
      st        j                  dfS |dk(  rt        j                  dfS t        j                  |       }|j                  }d}|rat!        j"                  t         j$                  j&                  | d      }t!        j"                  t         j$                  j&                  | d      }n	 	 	 	 	 	 dd	}|}|
dk(  r ||	|
      }|dk(  rt        j(                  |fS |t+        |t,              rt/        j0                  t2        d
d      5  t5        |      \  }}d d d        hft         j                  j                  j                  t	        ||z               }|	|k(  r,t6        j9                  d|||||       t        j(                  dfS t        j(                  |fS |	|k  s|
|dz  dz  k\  rt        j                  dfS t;        | |||||dk7  r|nd|t        j                        }dd} ||      \  }}|r ||      \  }}t=        |      dk(  rt        j                  dfS t?        j@                  |jC                         |jE                               \  \  }}}d}d}|D ]  }t         j                  j                  jG                  ||      }t         j                  j                  jI                  ||tK        |jM                                     } tO        d | D              }!|!r|dz  }|dz  } ||kD  rt        j(                   ||	|
      fS t        jP                   ||	|
      fS # 1 sw Y   xY w)Nscanr  rA   r      T)inner_reductionFc                     yr  r   )reduction_numel_hint
numel_hints     r   inner_reduction_splitsz4Reduction.num_splits.<locals>.inner_reduction_splits7  s     r   r  zUse previous IRNode's range and reduction_ranges instead of split. current ranges: %s, current reduction ranges: %s, current split: %d, new ranges: %s, new reduction ranges: %sr&  r   r  rQ  c           	     2  	 | j                         }|J t        d t        || j                         | j	                               |       }|j                         }|j                  J |j                  D  cg c].  } t        | t              rt        | t        j                        s| 0 }} g }d}t        |j                  d       D ]  	t        	fd|D              s|j                  	j                         	j                   t"        j$                  j&                  v sZt"        j$                  j&                  	j                      }t)        |j*                  dd       }|j-                          t)        |j*                  dd       |k7  sd} ||fS c c} w )	Nr   r   r   r   rC  rB  Fc                    | j                   S r   r  r   s    r   <lambda>z@Reduction.num_splits.<locals>.get_read_indices.<locals>.<lambda>  s
    aff r   keyc              3  N   K   | ]  }|j                   j                  v   y wr   )r   r1   )r   r  mds     r   r   zAReduction.num_splits.<locals>.get_read_indices.<locals>.<genexpr>  s      FaqBHH111Fs   "%r   T)r  rx  r  r  r   r  
range_varsr   r    r   Numbersortedr  r   r  r   r   rn   r   name_to_bufferr   rC  decide_layout)
r  r   cbread_writesrd  indiceschangedbuforiginal_striderc  s
            @r   get_read_indicesz.Reduction.num_splits.<locals>.get_read_indiceso  sh   \\^F%%%%!++-
 B ,,.K ))555 %//a&z!U\\/J J 
 GG[..4DE +F:FFNN288,ww!''"8"88gg44RWW=*1#**h*M))+"3::x>/Q&*G+ G##!s   83Fr   c              3  4   K   | ]  }|d k(  xs |dkD    yw)r   rA   Nr   r   s     r   r   z'Reduction.num_splits.<locals>.<genexpr>  s     9AQ!a%9r  )rX  r   rY  r   r   r   )r  r9  r   ztuple[Sequence[Expr], bool]))rn   r   r   r4  ri   has_featurerD   REDUCE_TO_SINGLE_ELEMENTrB   split_reductionsr   rU   rR  rT   r  multi_processor_count	functoolsr   choicesreduction_split_factorINNERr   r   r   r   r  rK   logr  r9  r   rC   index_vars_squeezer   r)  simplify_with_rangesstride_hintsr   keysr   OUTER)"r   	dst_dtyper;  r  r  r:  r&  reduction_numel
input_noderX  rY  should_splitpropsnum_smmin_elements_per_threadrZ  outer_reduction_splitsr  
new_rangesnew_reduction_rangesextracted_numel_hintr  ro  rk  rl  r   rI  ranges1	num_outer	num_innerr   jrJ  outers"                                     r   
num_splitszReduction.num_splits
  s
     !ww//==oNWW%%33M&4IJ
%/ 
##FN,S,STT (( '' 	 /0Z
5K ((!++U" ((!++ ''/,,"$@I@Q@Q		00&$A" AJ@Q@Q		00&%A"
&)  &<" ?*+?LEz$**E11%*Z*K\\.2BDI H <JG",H
 ).B.N+,77+;+;+I+I%j3G&GH,( ,/CC		G #,!&0	  -22B66 &&-- $;;VaZ"_, ((!++--;v-E>5(00	
!	$F ,A.)!,JGQw<1 ((!++'3'F'FJJL!..0(
$NW 		 	A  55aAAgg&&33>4#7G
 999EQ	Q		 y  &&(>$j)   !&&(>$j)  UH Hs   O++O5c                     t         j                  j                  j                        t	        ||      dfd|dv r1t        t        j                              	 	 	 	 	 	 d fdfdS  S )z1Convert inner_fn from a reduction to an pointwisec                     t        j                   fdt        j                  D cg c]  }t	        |       c} D              S c c}w )Nc              3  0   K   | ]  } |        y wr   r   )r   rN  r   value_fns     r   r   z=Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<genexpr>  s        UF+   )ru  reduce	itertoolsproductr   )r   r   
combine_fnr:  r  s   ` r   r   z*Reduction._unroll_reduction_fn.<locals>.fn  sH    ##"+"3"3,<=q%(=# 
 >s   A
r  r  c                    |D cg c]  }t        j                  |       }} | |      t        j                   |      t        j
                        fS c c}w r   )r   expandrl   
index_exprr  int64)r   rN  r   flatten_indexr  s      r   r  z0Reduction._unroll_reduction_fn.<locals>.value_fn  sP     4::a%,,q/::UF+NN=#8%++F  ;s   Ac                     |       d   S r  r   )r   r   s    r   r_  z0Reduction._unroll_reduction_fn.<locals>.<lambda>  s    E1 r   )r   rT  r   r   )r   rT  rN  rT  r   r5  )rn   r   r   guard_int_seqr7  _fixed_indexerr  r  )r  r:  r&  r;  r  r  r   r  s   ``  @@@@r   _unroll_reduction_fnzReduction._unroll_reduction_fn  s     77++99:JK-niH
		 11* 112BCM
)3E* .-HIr   c
                    t         j                  j                  j                  t	                    dk(  r]dfd}
 |
d       |
d       |
d       |
d      dv s
J  d       dfd}t
        j                  |||t        |            S dk(  r+dv rdfd	}ndfd
}t
        j                  |||      S t        t              rt         j                  j                  j                        t        j                  k  rSt	        |      dk7  st        |j                        r0dk7  r+t
        j                  || j                  |      |      S | j!                  ||||		      \  }}dfd} ||      }|t"        j$                  k(  r|}|dk(  rX|	J t'        j(                  t*        dd      5  t-        |	      \  }}ddd       J J | j/                  ||||||
      S |dkD  r| j1                  ||||||	
      }d}t        j2                  j4                  r t        |t6              r	 	 	 	 dd} ||      }|rht        |j8                  t:              sJ t        |j8                                |j8                  j<                  d   |_        |_         ||_!        |_"        |S t6        j                  t;        ||||            }|S # 1 sw Y   xY w)zy
        Create a reduction node. May split the reduction to multiple layers to expose
        more parallelism.
        r   c                   t         j                  k(  rt        |       S j                  r+t        | t              sJ t        |              t        |       S t        | t              sJ t        |              t        |       S r   )	r  r   r  r   r   r   r  r   r   )valr  s    r   py_cnstz!Reduction.create.<locals>.py_cnst  sg    

*9$00%c=9D49D9 :%%c;7BcB7s8Or   rA   )r  r  r
  r  z* not supported for zero-dimension tensors!c                6    t        j                           S r   rl   r  )r   r  r&  rtypes_to_initss    r   const_fnz"Reduction.create.<locals>.const_fn  s    ||ON$CYOOr   r  r  c                0    t        j                  d      S r  r  )r   r  s    r   r   zReduction.create.<locals>.fn  s    <<955r   c                n    D cg c]  }t         j                  j                   }} | |      S c c}w r   r   r  r  )r   r   reduction_indexr  r:  s      r   r   zReduction.create.<locals>.fn!  s1    =M&Nuww||&NO&N#E?;; 'O   !2r  c                `    t              r| S | dkD  rt        | t        j                        S | S r  )r   r  rB   min_num_split)r  r  s    r   _maybe_increase_splitz/Reduction.create.<locals>._maybe_increase_splitJ  s/    /*qy5&"6"677r   r&  Nr  Tc                :   | j                         }t        |      dk7  ry t        t        |            }|t        j
                  j                  vry t        j
                  j                  |   }t        |t              sy |j                  j                         J |S r  )r  r   nextiterrn   r   rg  r   rx  rB  r'  )cur_node
read_namesbufnamerm  s       r   _find_split_reductionz/Reduction.create.<locals>._find_split_reduction  s     "*!8!8!:J:!+#"4
#34Gagg&<&<<#''009C%c>:#88668DDDJr   rQ  )r  r   r   zUnion[bool, float, int])r   r   r   rm   )r  r   r   r   )r  r   r   zOptional[ComputedBuffer])#rn   r   r   simplifyri   r  r  r   r   r!   size_hint_or_throwrB   unroll_reductions_thresholdre   r   r  r  rU   rR  r   r   r  rK   !create_multilayer_existing_rangescreate_multilayerr  mix_order_reductionr   rB  r9  r:  _split_size_original_inner_fn_original_ranges_original_reduction_ranges)r  r   r  r;  r  r  r:  r&  r<  r  r  r  r   hintr  r  r  r  r   split_reductionr  r  r  s     ` ` ``             @@r   r  zReduction.create  s~   $ ''**33MBR4STa$ qz"1:
qz	O "_4 !""LM4P ##!F|	 $   a!556
< ##YF $  
 0  33OD001v&!+vfkk/B%' ##11.	  $   nn

e	 &e,
 ]222!NB;)))n.>E 3V40
0 )))'33388 $  QY'' C #O}}00ZY5O'-$ #8"< "/"6"6	B O0012B />.B.B.S.STU.V+5=2390=M:J!!1-#-	
 
k s   K33K=c           
        | dv rAt        |      rt        d      S t        |      ryt        j                  |      j
                  S | dv rAt        |      rt        d      S t        |      ryt        j                  |      j                  S t        |      rdnd}t        |      rdnd}||||||||f|||ft        d      |fd	|    S )
N)r  r  z-infF)r	  r  infTr   rA   )r  r
  r  r  r  welford_reducer(  online_softmax_reduce)r,   r  r+   r  iinfor	  r  )r&  r   zeroones       r   default_accumulatorzReduction.default_accumulator  s     ..e$V}$!%({{5)---..e$U|#!%({{5)---(/uQ&u-d1#T40 $dD1&+FmT%:	
 	 		r   c                :    | dk(  ryt         j                  | |      S )Nr  r   )r9  r  r&  r   s     r   default_valuezReduction.default_value  s#     --,,^UCCr   c                    | dk(  r|S | dk  r(|dk  r#|t         j                  k(  rt         j                  S | dk  r(|dk  r#|t         j                  k(  rt         j                  S |S )Nr&     i      )rU   r~  
OUTER_TINY)r  rY  r<  s      r   _multilayer_second_step_hintz&Reduction._multilayer_second_step_hint  sg     B;!!C<J#-.MDWDW2W +++TMc!-"5"55 +++r   c                z   |yt         j                  j                  j                  |j	                         |      sy|j                          	 t        |       |j                         }t        |dd       D ]3  \  }}t         j                  j                  j                  |d      s1|c S  y# t        $ r Y yw xY w)z
        If we are reducing over the full tensor, and it is non-dense in the last dimension,
        reindex so we reduce over the dense dimension. initially just handle complete
        reduction case
        Nr&  rA   )
rn   r   r   r3  r  r  rD  r  r(  r   )r  r  r  rJ  r   r   s         r   $check_for_split_dense_dim_reindexingz.Reduction.check_for_split_dense_dim_reindexing  s     ww77  "O
 	!*- '')gcrl+ 	DAqww771=	  # 		s   B. .	B:9B:c                
  
 | j                  |      }t        j                  |g|      t        j                  j
                  j                  t        j                  |z  d             
	 	 	 	 	 	 d
fd}	|	S )Nr   c                   |\  }| ^ }|z  |z   d
fd}r`t              }t        j                  t        j                  |      t        j                  |            }t        j                  ||	      S  |       S )Nc                 $       g            S r   r   )rk  r  	new_indexr   s   r   bodyzCReduction._multilayer_wrap_loader.<locals>.wrapper_fn.<locals>.body(  s    i');<<r   )r   rm   )r_   rl   r  r  masked)r   r  reduction_blockr  index_dtyper   rk  r  
block_sizedefaultr  	need_maskr  r   s         @@r   
wrapper_fnz5Reduction._multilayer_wrap_loader.<locals>.wrapper_fn!  s     "1_*/'Y ?2_DG= = -o>vvNN7K8NN?K@ zz$g66vr   )r   Sequence[Symbol]r  r  r   rm   )	r  Viewdynamic_reshape_indexerrn   r   r   r  r   r$  )r  r  r:  r  r  r  r  r  dense_indexr  r  r   s    ` ` ``   @@r   _multilayer_wrap_loaderz!Reduction._multilayer_wrap_loader  s     >>Z
 ../
 ((>>HH_u,a0
 
		#	6F		 	( r   c                    t        d D              s
J d       t        j                  |t        |      t        |      z         	 	 	 	 	 	 dfd}|S )Nc              3  &   K   | ]	  }|d k(    yw)rA   Nr   r   r  s     r   r   zDReduction._multilayer_wrap_loader_existing_ranges.<locals>.<genexpr>@  s     3a163s   z8Only enabled for numel_hint == 1, found original_ranges=c           	         | d t               }| t              d  } | t        |      t        |      z               S r   )r   r   )merged_indexnew_reduction_indexoriginal_idxr  r  original_rangesr   s       r   r  zEReduction._multilayer_wrap_loader_existing_ranges.<locals>.wrapper_fnG  sQ     ((>#o*>?L$S%9%;<Ii(51D+EEF r   )r  rE  r  rE  r   rm   )r   r  r  r   )r  r  r  original_reduction_rangesr  r  r  r   s    ``    @r   '_multilayer_wrap_loader_existing_rangesz1Reduction._multilayer_wrap_loader_existing_ranges7  sy     3?33 	
G6HI	
3 ..%uZ'85AU;V'V
		(		!/		 		 r   c                   |t         j                  t         j                  fvr|nt         j                  }t        j                  |||||||	|      }|j                          |j                         	 	 	 	 	 	 dfd}t        j                  j                  j                  t        |            }| j                  |
||      }||dt        |       k(  sJ t        j                  t	        |||||t        |      d |	||            S )a
        Break a large reduction up into multiple smaller reductions
        recursively
        c                     g | |      S r   r   )r   r  intermediate_loaders     r   intermediate_fnz;Reduction.create_multilayer_helper.<locals>.intermediate_fn|  s     ''A'A'ABBr   NrQ  )r   rT  r  rT  r   rm   )r  float16bfloat16r  r9  r  r  r  rn   r   r   optimization_hintri   r  r   r   )r  r   r  r;  r  r  r  r  r  r&  r  r<  intermediate_dtypeintermediater  rY  r  s                   @r   create_multilayer_helperz"Reduction.create_multilayer_helperT  s(   0  ??  	
 !'' 	
 	*668	C%	C8J	C	C
 WW%%77o8VW
99:~
 *-Cs?/C"DDDD(&!+C,@,B!C-#-	
 	
r   c                    t        |      }t        ||dz
  z   |      }| j                  ||      }| j                  |||||||
      }| j	                  ||||||g |||g|||	      S )r  rA   )ri   r=   r  r  r  )r  r   r  r;  r  r  r:  r&  r  r<  r  r  r  r  r  s                  r   r  zReduction.create_multilayer  s    & ((89o;UC
##NI>00

 ++feL
 	
r   c                j    | j                  |||||      }| j                  ||||||g ||||	d|
      S )r  r&  )r  r  )r  r   r  r;  r  r  r  r  r  r&  r<  r  s               r   r  z+Reduction.create_multilayer_existing_ranges  sc    $ @@% 

 ++%+o+
+ 
 	
r   rU  rZ  r  rD  rH  )
r  rI  r  r  r  rE  rI  r  r   r   rc  r   zSequence[Sequence[Expr]]rf  r   )r   rN  r  r@  r;  r@  r  zCallable[_P, OpsValue]r  rT  r:  rT  r&  z%Union[ReductionType, Literal['scan']]r  r    r  Optional[IRNode]r   tuple[ReductionHint, _IntLike])
r  z<Callable[[Sequence[_IntLike], Sequence[_IntLike]], OpsValue]r:  rT  r&  r   r;  r@  r   z(Callable[[Sequence[_IntLike]], OpsValue])r   rN  r  r@  r;  r@  r  r  r  rE  r:  rE  r&  rQ   r<  rU   r  r  r   r   r&  r   r   r@  r   #Union[_NumLike, Sequence[_NumLike]])r  rz   rY  r   r<  rU   r   rU   )r  rz   r  r  r   rW  )r  Callable[..., OpsValue]r:  rT  r  rz   r  rz   r  rz   r  r  r  r  r   Callable[..., object])r  4Callable[[Sequence[Expr], Sequence[Expr]], OpsValue]r  rE  r  rE  r  Sequence[Integer]r  r  r   z@Callable[[Sequence[sympy.Expr], Sequence[sympy.Expr]], OpsValue])r   rN  r  r@  r;  r@  r  r  r  rE  r  rE  r  
list[Expr]r  list[Integer]r&  rQ   r  rz   r<  rU   r   r   )r   rN  r  r@  r;  r@  r  r  r  rE  r:  rE  r&  rQ   r  rz   r<  rU   r  r  r   r   )r   rN  r  r@  r;  r@  r  r  r  rE  r  rE  r  r
  r  r
  r&  rQ   r<  rU   r   r   ) r   r   r   r   r  r  rY   r%  r)  r'  rH  rK  r  r  r/  rk  r  r  r  rU   rR  r  r  r  r  r  r  r  r  r  r  __classcell__rB  s   @r   r9  r9    s   ((!!!!N HK(
 )

%#L"L 3L 	L
 )L 
L=


  (,ggg g )	g
 #g -g >g g %g 
(g gR )N),) ) 	)
 
2) )V  )6(=(='+OO O 	O
 %O O )O &O &O %O 
O Ob $/	, > DD$/D	,D D %(:G	   &4D	 >  (,('( -( "	(
 ( ( 5( %( 
( (T D ( $2	
 & 0 
J 8 =
=
 =
 	=

 '=
 (=
 $2=
 =
 ,=
 &=
 =
 &=
 
=
 =
~  (,+
+
 +
 	+

 %+
 +
 )+
 &+
 +
 &+
 %+
 
+
 +
Z $
$
 $
 	$

 %$
 ($
 $2$
 "$
 ,$
 &$
 &$
 
$
 $
r   r9  c                     d fd}|S )1A closure containing math to read a given elementc                    t        |       t              k(  sJ t        |       t              k(  sJ }t        |       D ]  \  }}}|dk7  s|||z  z   } |S r  )r   r   )r   resultr   stszrF  r   r   s        r   r  z_fixed_indexer.<locals>.indexer  ss    !c%jCK&???5zSY&&&ufd3 	+KCRQw#(*	+ r   )r   r\  r   r   r   )r   r   rF  r  s   ``` r   r  r    s     Nr   INNER_FN_TYc                  l     e Zd ZU ded<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 	 	 	 	 	 	 	 	 	 ddZ xZS )MultiOutputReductionr   output_indexc
           
         t              rft              dk(  rd   }
n	 	 	 	 	 	 dfd}
t        |   |||
|||||       |	| _        y )NrA   r   c                2     t         fdD              S )Nc              3  0   K   | ]  } |        y wr   r   )r   r   r   reduction_idxs     r   r   z@MultiOutputReduction.__init__.<locals>.loader.<locals>.<genexpr>  s     HR]3Hr  )r   )r   r  	inner_fnss   ``r   r  z-MultiOutputReduction.__init__.<locals>.loader  s     HiHHHr   rQ  )r   rE  r  rE  r   ztuple[OpsValue, ...])callabler   r@  __init__r  )r  r   r  r  r  r:  r&  r;  r<  r  r  rB  s      `       r   r  zMultiOutputReduction.__init__  s     I"I y>Qq\FI#I4BI%I
 	-)) 	 		
 )r   c           	     :   t        j                  | j                  | j                  | j                  | j                  ||            }t        |t        t        f      sJ t        |             || j                     }t        j                  |xs d ||      |      S r  )rl   rG  r   r;  r&  r  r   r   r   r   r  rH  )r  r  r  r  rI  r   r  s          r   rH  z$MultiOutputReduction.store_reduction(  s     JJNNMM$/	
 &5$-0>$v,>0t(()"";#;)WT]ERRr   )r   rN  r  r@  r  z)Union[INNER_FN_TY, Sequence[INNER_FN_TY]]r  r  r:  r  r&  rQ   r;  r@  r<  rU   r  r   )
r  rI  r  r  r  rE  rI  r  r   r   )r   r   r   r   r  rH  r  r  s   @r   r  r     s    #)#) #) =	#)
 "#) ,#) &#) #) &#) #)JS"S 3S 	S
 )S 
Sr   r  c                  ^    e Zd Zeej
                  df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zy)OnlineSoftmaxReductionNc
           	         t        fdt        |      D              }
|
D ]  }|j                           |
S )z>
        Create the reduction disregarding splitting.
        c              3  j   K   | ]*  }t         j                  t        d |	             , yw)r  N)r   r  r  )	r   
output_idxr   r  r  r  r<  r:  r;  s	     r   r   z0OnlineSoftmaxReduction.create.<locals>.<genexpr>K  sI      
  $$+"

s   03)r   r   r  )r  r   r  r;  r  r  r:  
num_outputr<  r  resultsr  s    `````` `   r   r  zOnlineSoftmaxReduction.create;  sG       
 
 $J/
 
   	AIIK	r   )r   rN  r  r@  r;  r@  r  r  r  rE  r:  rE  r$  r   r<  rU   r  r  r   Sequence[TensorBox])r   r   r   r  rU   rR  r  r   r   r   r   r   :  s     )6(=(='+!! ! 	!
 %! ! )! ! &! %! 
! !r   r   c                      e Zd Zeej
                  f	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Ze	 	 	 	 	 	 dd       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z	y)WelfordReductionc                   |dv sJ t         j                  j                  j                  t	                    }d
fd}	|dk(  r |	d      }
 |	d      } |	d      }|
||fS |dk(  r@	 	 	 	 dfd|dk(  r |d          |	d       |	d      fS t        fd|D              S t        j                  |d   ||      \  }}|t        j                  k(  r|}|dkD  r| j                  ||||      S t        d	      D cg c](  }t        j                  t        ||||	            * }}|D ]  }|j                           |S c c}w )N)r  r(  c                X     d fd}t         j                  |t                    S )Nc                0    t        j                        S r   r  )r   r   r  s    r   r  z8WelfordReduction.create.<locals>.const.<locals>.inner_fnq  s    || r   r  r   rE  r   rm   r  r  r   )r  r  r   r   r  s   ` r   constz&WelfordReduction.create.<locals>.constp  s2     ##!F|	 $  r   r   rA   c                X     d fd}t         j                  |t                    S )Nc                n    D cg c]  }t         j                  j                   }} | |      S c c}w r   r  )r   r   r  r  r:  s      r   r  z7WelfordReduction.create.<locals>.copy.<locals>.inner_fn  s1    =M&Nuww||&NO&N!#77 'Or  r  r,  r-  )r  r  r   r   r  r:  s   ` r   copyz%WelfordReduction.create.<locals>.copy  s2    8 !''!%<	 (  r   r  c              3  .   K   | ]  } |        y wr   r   )r   r   r1  s     r   r   z*WelfordReduction.create.<locals>.<genexpr>  s     :"T"X:   )r&  r  r   )r  r   r   r   )r  r  r   r   )rn   r   r   r  ri   r   r9  r  rU   rR  r  r   r   r  r(  r  )r  r   r   r  r  r:  r&  r<  r  r.  meanm2weightr  r  r#  r%  r  r1  s    `` ``            @r   r  zWelfordReduction.createa  s    !FFFF''**33MBR4ST	 a8DqB1XFV##aL  !11IaL)58U1X==:	:::&  **aL)+ + 	
e ]222!N19(( 	 	2 $Ah
   $""

 
   	AIIK	%
s   -Ec                     y)N)r   r   r   r   r  s     r   r  zWelfordReduction.default_value  s     r   c	                    t              t        j                  j                  j	                  t        j                  z  d             }	|	rH|dk7  rC	 	 	 	 	 	 	 	 d
fd}
 j                  ||d   t        |
d      t        |
d      f|d|      S t        dz
  z         t        j                  |t         fd|D              g |g||      }|D ]  }|j                           	 	 	 	 	 	 	 	 ddt        j                  j                  j                  t        |            } j                  ||      }t        j                  |t        fd	|D              |gd|      S )r  r   r(  c                0    t        j                  |      S r   r  )r   r  r  r   s      r   r  z4WelfordReduction.create_multilayer.<locals>.constant  s     ||E511r   r  rA   )r   r   r  r  r:  r&  r  r<  c           	   3  L   K   | ]  }j                  |d         yw)r   )r  N)r  )r   r  r  r  r  r:  r  s     r   r   z5WelfordReduction.create_multilayer.<locals>.<genexpr>	  s=      
  ++$# , 
s   !$c                     |g | |      S r   r   )r   r  r  s      r   intermediate_loader_fnzBWelfordReduction.create_multilayer.<locals>.intermediate_loader_fn$	  s    
 4E4O455r   c              3  T   K   | ]  }t        |j                                 ! yw))r  N)r   r  )r   r   r<  s     r   r   z5WelfordReduction.create_multilayer.<locals>.<genexpr>2	  s*       .q}}GG   %()r   rE  r  rE  r  r   r   rm   )r   rE  r  rE  r  rP  r   rm   )ri   rn   r   r   r  r   r$  r  r   r=   r(  r  r   r  r   r  )r  r   r   r  r  r:  r&  r  r<  r  r  intermediatesr   rY  r  r<  r  s   ` `  ` `      @@@r   r  z"WelfordReduction.create_multilayer  s     ((89((>>HH_u,a0
 
	 +<<2#24B2KN22
 ((aLHA.HA.
 !10- )   o;UC
(// 
 (
 
 feL#
&  	AIIK		6!	6+	6 9	6 		6 WW%%//f0EF
99:~
  && &  G
 	
r   N)r   rN  r   r@  r  Sequence[Callable[..., Any]]r  r
  r:  r
  r&  rQ   r<  rU   r   r&  r  )r   rN  r   r@  r  r@  r  r
  r:  r
  r&  rQ   r  rz   r<  rU   r   r&  )
r   r   r   r  rU   rR  r  rk  r  r  r   r   r   r(  r(  `  s    )6(=(=vv v 0	v
 v (v &v &v 
v vp $/	, 
 Z
Z
 Z
 0	Z

 Z
 (Z
 &Z
 Z
 &Z
 
Z
 Z
r   r(  c                  x    e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<    ed       dd fd       Zd  fdZ	 	 	 	 	 	 	 	 	 	 d!dZd"dZd#dZ	d#dZ
d#dZd$dZd%dZdddZeej"                  fdd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d&d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d'd       Z xZS )(Scanr
  scan_rangesr   =Callable[[tuple[Any, ...], tuple[Any, ...]], tuple[Any, ...]]r  zFCallable[[Sequence[_IntLike], Sequence[_IntLike]], Sequence[_IntLike]]r   rU   r<  r   r  tuple[torch.dtype, ...]dtypestuple[Callable[..., Any], ...]r  c                    t         |          t               j                  fd| j                  D         z   t               j                  fd| j
                  D         z  S )Nc              3  6   K   | ]  }t        |        y wr   r'   r  s     r   r   z,Scan.get_free_symbol_uses.<locals>.<genexpr>T	       O"1m4Or  c              3  6   K   | ]  }t        |        y wr   r'   r  s     r   r   z,Scan.get_free_symbol_uses.<locals>.<genexpr>W	       H"1m4Hr  )r@  r%  r:   r  rC  r   rA  s    `r   r%  zScan.get_free_symbol_usesL	  sa     G(7 jl  Od>N>NO !jl  HdiiH		
r   c                    t        | j                        t        | j                        z   t        | j                        k(  sJ t        |           y r   )r   r  rC  r   r@  r  r  rB  s    r   r  zScan.__post_init__[	  =    4;;#d&6&6"773tyy>IIIr   c                   | j                  ||      t        fd| j                  D              }t        j                  | j
                  | j                  |      }t        j                  |xs d |      || j                           S )Nc              3  .   K   | ]  } |        y wr   r   r   r  r   s     r   r   z'Scan.store_reduction.<locals>.<genexpr>g	       Dx}Dr3  r  )	r   r   r  rl   rT  rF  r  r  r  )r  r  r  r  	scan_varsr   r  r   s          @r   rH  zScan.store_reduction_	  so     ll4+DT^^DD$++t?yy$9gclF4;L;L4M
 	
r   c                     y)Ncustomr   r  s    r   r'  zScan.get_reduction_typem	  s    r   c                    | j                   S r   )rC  r  s    r   r)  zScan.get_reduction_sizeq	  r  r   c                    | j                   S r   r   r  s    r   r   zScan.get_sizet	      yyr   c                    | j                   S r   r  r  s    r   r  zScan.get_pointwise_sizew	  r  r   c                X    t        | j                        t        | j                        z   S r   )r   r  rC  r  s    r   rK  zScan.index_lengthz	  !    4;;#d&6&6"777r   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }|fS r   )r  r  rC  r@   rM  r   r  r   rN  r   s       r   r  zScan.inner_fn_args}	  E    DKK(T--t}}=ll5&)vr   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }t        | j                  ||      S r  )r  r  rC  r@   rM  r   rJ   r  r  r^  r   rN  r   s        r   r  zScan.inner_fn_free_symbols	  Q    DKK(T--t}}=ll5&)#DMM3mTTr   T)can_fallback_to_atenc                  g |d  |dz   d  |   gt         j                  j                  |t        j                        sd gt        |      z  S t        |      dkD  r=t         j                  j                  |t        j                        sd gt        |      z  S t         j                  j                  }
|
j                  t                    }t        |      t        |      k(  sJ |
j                  t        j                  |d            r?t        t        |            D cg c]!  }t        j                  |||   ||   |      # c}S | j!                  ||d   |d   ||      \  }}t"        }|dkD  r[t$        j&                  j(                  d u xs t*        xr	 t,        dk\  xr t        |      dk(  }|s|rd gt        |      z  S d}nt.        }dfd}t        t        |            D cg c]0  }t0        j                   |d	|||   |||   ||||||d|	      2 }}|D ]  }|j3                           |S c c}w c c}w )
NrA   r  r   )r   r   r  axispointwise_rangesrC  r  
scan_numelz3.3.0c                    t        |      t              k(  sJ t        |       t              k(  sJ g | d  || d  S r   r   )r   
scan_indexrf  rg  rC  s     r   r   zScan.create.<locals>.reindex	  S    z?c+&6666u:%5!6666>U5D\>J>tu>>r   )r   r   rF  r  r  r   r  rC  r  r   r<  r  )r   rE  rk  rE  r   r	  r   )rn   r   rq  rD   SCANr   TUPLE_REDUCTIONr   r  ri   r  r   r+  r   r  r  r  rB  r  versionhip
has_tritontriton_version	SplitScanr   r  )r  r   rF  r  r   rf  r  r<  rd  r   r   rh  r  r  	scan_typesupports_splitr   r%  r  rg  rC  s        `             @@r   r  zScan.create	  s    =T%4[<4q
+;<Dzlww""6>+>+>?6CK''v;?177#6#6N22$
 6CK''77##&&}['AB
6{c)n,,, ))%((:q*AB %*#f+$6 !   ! .&|4	 !   &)^^)q\-#!! &4 	&
"
 	> !!T)Wj.V^w=V% v;!#  "' 6CK//!"J%		?. !&c&k 2%
$ #  ! .!&|4'+ +)##1!- 
 
*  	FNN	 AP
s   )&I85Ic	           
     N    dfd}	t         j                  ||||	||d|      S )Nc                ,     g | d  || d        S r   r   )r   r  rf  r  s     r   r  z#Scan.num_splits.<locals>.wrapper_fn	  s*    Fc%4jF=F3tu:FGGr   rT  )r   r  r;  r  r  r:  r&  r  )r   rE  r  rE  r   rm   )r9  r  )
r  r   r   r  rf  rg  rC  r  rh  r  s
      ``     r   r  zScan.num_splits	  s;    	H ###(!& $ 	
 		
r   rZ  r  r8  )
r  rI  r  z%Callable[[Sequence[_IntLike]], Never]r  rE  rT  r  r   r   rH  rD  rc  r  )r   rN  rF  rE  r  z+tuple[Callable[[Sequence[Expr]], Any], ...]r   r
  rf  r   r  rD  r<  rU   rd  r   r   r   r   Sequence[Optional[TensorBox]])r   rN  r   r@  r  rP  rf  r   rg  r
  rC  r
  r  rD  rh  r    r   r  )r   r   r   r   rY   r%  r  rH  r'  r)  r   r  rK  r  r  r  rU   rR  r  r  r  r  s   @r   rB  rB  >	  s   
MMSS!!##-- F#
 $
 
"
 7
 	

 $
 

 8U  )6(=(=` &*`` (` ?	`
 ` ` R` &` #` ` 
'` `D 

 
 7	

 
 (
 #
 R
 
 
(
 
r   rB  c                      e Zd Zy)rs  N)r   r   r   r   r   r   rs  rs  	
  s    r   rs  c                  F    e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<    ed       dd fd       Zd fdZ	 	 	 	 	 	 	 	 	 	 ddZd dZd!dZ	d!dZ
d!dZd"dZd#dZdddZeej"                  f	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d$d       Z xZS )%Sortr
  sort_rangesr   z:Callable[[Sequence[Expr], Sequence[Expr]], Sequence[Expr]]r   rU   r<  r   r  rE  rF  rG  r  r   stable
descendingc                    t         |          t               j                  fd| j                  D         z   t               j                  fd| j
                  D         z  S )Nc              3  6   K   | ]  }t        |        y wr   r'   r  s     r   r   z,Sort.get_free_symbol_uses.<locals>.<genexpr>$
  rJ  r  c              3  6   K   | ]  }t        |        y wr   r'   r  s     r   r   z,Sort.get_free_symbol_uses.<locals>.<genexpr>'
  rL  r  )r@  r%  r:   r  r|  r   rA  s    `r   r%  zSort.get_free_symbol_uses
  sa     G(7 jl  Od>N>NO !jl  HdiiH		
r   c                    t        | j                        t        | j                        z   t        | j                        k(  sJ t        |           y r   )r   r  r|  r   r@  r  rN  s    r   r  zSort.__post_init__+
  rO  r   c                ,   | j                  ||      t        fd| j                  D              }t        j                  | j
                  || j                  | j                        }t        j                  |xs d |      || j                           S )Nc              3  .   K   | ]  } |        y wr   r   rR  s     r   r   z'Sort.store_reduction.<locals>.<genexpr>7
  rS  r3  r  )
r   r   r  rl   sortrF  r}  r~  r  r  )r  r  r  r  rI  r   r  r   s          @r   rH  zSort.store_reduction/
  su     ll40DT^^DD$++vt{{DOOLyy$9gclF4;L;L4M
 	
r   c                     y)Nr  r   r  s    r   r'  zSort.get_reduction_type=
  s    r   c                    | j                   S r   )r|  r  s    r   r)  zSort.get_reduction_size@
  r  r   c                    | j                   S r   rY  r  s    r   r   zSort.get_sizeC
  rZ  r   c                    | j                   S r   r  r  s    r   r  zSort.get_pointwise_sizeF
  r  r   c                X    t        | j                        t        | j                        z   S r   )r   r  r|  r  s    r   rK  zSort.index_lengthI
  r]  r   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }|fS r   )r  r  r|  r@   rM  r   r_  s       r   r  zSort.inner_fn_argsL
  r`  r   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }t        | j                  ||      S r  )r  r  r|  r@   rM  r   rJ   r  rb  s        r   r  zSort.inner_fn_free_symbolsR
  rc  r   c	                   g |d  |dz   d  |   gt         j                  j                  |t        j                        sd gt        |      z  S t         j                  j                  }
|
j                  t                    }d}t        j                  j                  xr% |
j                  t        j                  ||            }|sd gt        |      z  S t        |      t        |      k(  sJ |
j                  t        j                  |d            r?t        t        |            D cg c]!  }t         j#                  |||   ||   |      # c}S dfd}t        t        |            D cg c]4  }t$        j#                  t'        d|||   |||   |||||||d|	      6 }}|D ]  }|j)                           |S c c}w c c}w )NrA   r  r  c                    t        |      t              k(  sJ t        |       t              k(  sJ g | d  || d  S r   rj  )r   
sort_indexrf  rg  r|  s     r   r   zSort.create.<locals>.reindex
  rl  r   )r   r   rF  r  r  r   r  r|  r   r<  r  r}  r~  )r   rE  r  rE  r   r	  r   )rn   r   rq  rD   SORTr   r   r  ri   rB   r  persistent_reductionsr  r   r+  r   r  r  r   r{  r  )r  r   rF  r  r   rf  r}  r~  r<  r   r   
sort_numel
max_rblockis_persistent_kernelr  r   r%  r  rg  r|  s        `            @@r   r  zSort.createX
  s    =T%4[<4q
+;<Dzlww""6>+>+>?6CK''77##&&}['AB
 
MM// Q..uxx
J/OP 	 $6CK''6{c)n,,, ))%((:q*AB %*#f+$6 !   ! .&|4	 !  	?0 !&c&k 2'
& %  ! .!&|4'+ +##1!-!) 
 
,  	FNN	 Q
s   2&G99GrZ  r  r8  )
r  rI  r  rR  r  rE  rI  rE  r   r   rH  rD  rc  r   )r   rN  rF  rE  r  z'tuple[Callable[[list[Expr]], Any], ...]r   r
  rf  r   r}  r   r~  r   r<  rU   r   r   r   rx  )r   r   r   r   rY   r%  r  rH  r'  r)  r   r  rK  r  r  r  rU   rR  r  r  r  s   @r   r{  r{  
  s>    
GG!!##--L F#	
 $	
 
"
 2
 	

 '
 

 8U  )6(=(=LL (L ;	L
 L L L L &L L 
'L Lr   r{  c                >    	 t        | d       y# t        $ r Y yw xY w)NFfreezeT)rD  r  r   s    r   r  r  
  s&    a. s    	c                    	 t        | d      \  }}|j                         r|j                          |j                         S # t        $ r Y yw xY wNFr  )rD  should_pad_stridespad_stridesis_contiguousr  )r   _bufferrC  s      r    is_contiguous_storage_and_layoutr  
  sR    /%@ $$& ##%% s   ?A 	AAc                   t        | t              rt        | j                  |||||      S t        | t              r:t        | j                  |||||      \  }}| | j                  j                         fS t        | t              r|rn|r0| j                          | j                         j                         s>J || j                  ||       n&|| j                  ||       n| j                          t	        |       | j                         fS t        | t              r(t        | j                  |      \  }}|| j                  fS t        )z
    Try to simplify x into a StorageBox and a Layout.

    allow_padding only affect how we apply stride_order. When allow_padding
    is True, we have the freedom to add padding when applying the stride_order.
    r  want_contiguousstride_orderr  r  r  r  )r   r   rD  rB  rf  r  rg  r	  r  r  r  rh  rH  rC  r  )	r   r  r  r  r  r  r   rC  buffers	            r   rD  rD  
  sG    !Y$FF+%''
 	
 !Z )FF+%''
	6 !&&##%%%!V!||~33555)11  2  *22! 3  !!}alln,,!_% *FF
	 qxx
r   c                d    	 t        | d      \  }}|j                  |      S # t        $ r Y yw xY wr  )rD  is_stride_orderedr  )r   r  r  rC  s       r   "is_stride_order_storage_and_layoutr  
  s:    /%@''55 s    # 	//c                   t        | t        t        f      rt        | j                        S t        | t
              rt| j                  }t        j                  j                  j                  |j                  t        |j                        z  t               }t        | j                        xs |S t        | t              r*| j!                         t        j                  j"                  v S yr  )r   r   rf  is_unalignedrB  rH  rC  rn   r   r   statically_known_multiple_ofrF  r`   r   rb   rg  rh  unaligned_buffers)r   rC  has_unaligned_layouts      r   r  r    s    $J/0DII&&$(#$77#3#3#P#PMMN6<<88/$
  
 DII&>*>>$}}!''";";;; r   c                      e Zd ZU ded<    ed       ddd       ZddZddZddZe	dd       Z
ddZd d	Zd!d
Zd"dZd#dZd$dZd%dZd&dZd'dZd(dZd%dZd%dZd)dZd*dZd+dZd,dZy)-rd  r   rB  c                8    | j                   j                  |      S r   rB  r%  r$  s     r   r%  zBaseView.get_free_symbol_uses  s    yy--m<<r   c                    t        d|        )Nzmake_reindexer NYI on ru  r  s    r   make_reindexerzBaseView.make_reindexer  s    !$:4&"ABBr   c                l    | j                   j                         | j                         dfd}|S )Nc                       |             S r   r   r   innerr   s    r   r  z&BaseView.make_indexer.<locals>.indexer#      &&r   )r   rE  r   r    )rB  r  r  )r  r  r  r   s     @@r   r  zBaseView.make_indexer  s/    		&&(%%'	' r   c                l    | j                   j                         | j                         dfd}|S )Nc                       |             S r   r   r  s    r   r  z$BaseView.make_loader.<locals>.loader,  r  r   r,  )rB  r  r  )r  r  r  r   s     @@r   r  zBaseView.make_loader(  s/    		%%'%%'	' r   c                6    | j                   j                         S r   )rB  r  r  s    r   r   zBaseView.dtype1  s    yy""$$r   c                6    | j                   j                         S r   rB  r  r  s    r   r  zBaseView.get_layout5      yy##%%r   c                6    | j                   j                         S r   rB  r  r  s    r   r  zBaseView.get_device8  r  r   c                     y r   r   r  s    r   r  zBaseView.get_origin_node;  r  r   c                6    | j                   j                         S r   rB  rh  r  s    r   rh  zBaseView.get_name>      yy!!##r   c                "    | j                         S r   r  r  s    r   r  zBaseView.get_pointwise_sizeA      }}r   c                8    | j                   j                  |      S r   rB  r  r  s     r   r  zBaseView.mark_reuseD      yy##E**r   c                6    | j                   j                         S r   rB  r  r  s    r   r  zBaseView.has_exceeded_max_readsG      yy//11r   c                6    | j                   j                         S r   rB  r  r  s    r   r  zBaseView.realizeJ      yy  ""r   c                8    | j                   j                          y r   rB  r  r  s    r   r  zBaseView.realize_hintM  s    		 r   c                6    | j                   j                         S r   rB  r"  r  s    r   r"  zBaseView.get_storage_numelP      yy**,,r   c                6    | j                   j                         S r   rB  r+  r  s    r   r+  zBaseView.is_externS      yy""$$r   c                    t        | j                  t              sJ t        | j                               | j                  j	                         S r   )r   rB  rd  r   is_module_bufferr  s    r   r  zBaseView.is_module_bufferV  s6    $))X.?TYY?.yy))++r   c                6    | j                   j                         S r   rB  r  r  s    r   r  zBaseView.get_read_namesZ      yy''))r   c                    t        j                  t        dd      5  t        | j	                         | j                               j                  cd d d        S # 1 sw Y   y xY wr  )r   r   r  rL   r  r   r  r  s    r   r  zBaseView.get_reads]  sL    \\.*:DA 	&  " e		 	 	s   2AA!c                d    | }t        |t              r|j                  }t        |t              r|S r   )r   rd  rB  )r  r   s     r   re  zBaseView.unwrap_viewd  s+    H%A H%r   c                    | j                         } t        j                  t        d|      |      }t	        || j                         || j                               S r  )r  r   r   r  r  r  r   r  s      r   r/  zBaseView.constant_to_devicej  sP    !!#Hn.?HP.."==?	
 	
r   NrZ  r  r   *Callable[[Sequence[Expr]], Sequence[Expr]]rQ  rO  r?  rA  rK  r;  rU  rD  rX  rC  rH  r8  rd  r9  rb  rY  rf  )r   r   r   r   rY   r%  r  r  r  rn  r   r  r  r  rh  r  r  r  r  r  r"  r+  r  r  r  re  r/  r   r   r   rd  rd    s    
LJ'= (=C % %&&$+2#!-%,*	
r   rd  c                  P    e Zd ZU ded<   edd       Zed	d       Zd
dZ	 	 ddZ	y)r   rE  r   c                   t         j                  j                  }|D cg c]  }t        j                  |       }}| j                         }dgt        |      t        |      z
  z  t        |      z   }t        |      t        |      k(  sJ t        t        |            D ]  }||   dk(  r||   J ||   ||<   ||   ,t         j                  j                  j                  ||         rM||   }||   }|J |J ||z
  }|j                  |d      dk(  r{J d| j                          d| d|         |S c c}w )zReplace `-1` with correct sizesNr&  r   fallbackzBroadcast failed in ExpandView(, z) on dimension )rn   r   r   r   r  r   r   r   r   is_size_one_or_falser  )	r   new_sizer   r   old_sizer   v1v2diffs	            r   _normalize_sizezExpandView._normalize_sizez  so    77##-56ELLO66::<6S]S]:;d8nL8}H---s8}% 	A{b {...&qk!$(8(8(M(M)  a[a[~%~~%~Bw..!" /  	 6ajjl^2hZ_`^ab)	8 A 7s   D=c                   | j                  ||      }t        |      rt        |      \  }}t        |      t        |j                        z
  }|dk\  sJ t
        j                  j                  g|z  }t        |j                  |j                        D ]Y  \  }}|j                  t        j                  j                  j                  |      s|nt
        j                  j                         [ t        |j                   |j"                  t%        |      ||j&                  |j(                        }	t+        ||	      S t-        ||      S )Nr   rA  )rB  r   )r  r  rD  r   r   r   r  r  r   r   r  rn   r   r   r  rE  r   r   r   rF  rG  rH  r   )
r  r   r  rK  rL  skiprM  r   r   rN  s
             r   r  zExpandView.create  s   &&q(3 #"7":GZx=3z#77D199'',,$.J #J$5$5z G !!77++@@F  %!!  X!!$$J #
CCqx00r   c                    | j                   S r   rY  r  s    r   r   zExpandView.get_size  rZ  r   c                    | j                         }| j                  j                         t        |      t              z
  	 	 	 	 dfd}|S )Nc                    t        | d        } t        |       t              k(  sJ t        t                    D ](  }|   dk(  st        j                  j
                  | |<   * | S r  )r   r   r   r   r  r  )r   r   actualr  s     r   r   z*ExpandView.make_reindexer.<locals>.reindex  sf     tu&Eu:V,,,3v;' ,!9>$ww||E!H, Lr   r   rE  r   rE  )r   rB  r   )r  targetr   r  r  s      @@r   r  zExpandView.make_reindexer  sP     ##%6{S[(		!				 r   N)r   r   r  rT  r   rT  )r   r   r  rT  r   rd  rD  r  )
r   r   r   r   rk  r  r  r  r   r  r   r   r   r   r   v  sA    
# #J 1 14	3r   r   c                  P    e Zd ZU ded<   edd       Zed	d       Zd
dZ	 	 ddZy)PermuteViewr	  dimsc           
        | j                  |      }t        |      t        t        t        |                  k(  sJ t	        |      rt        |      \  }}t        |j                  |j                  |D cg c]  }|j                  |    c}|D cg c]  }|j                  |    c}|j                  |j                        }t        ||      S t        ||      S c c}w c c}w )NrA  )rB  r  )_map_neg_dimsr:   r   r   r  rD  rE  r   r   r   r   rF  rG  rH  r  )r  r   r  rK  rL  r   rN  s          r   r  zPermuteView.create  s      &$:eCI.>#???? #"7":GZ$!!  -12#2/34!""1%4!!$$J #
CC-- 34s   5CC#c                R    |D cg c]  }|dk\  r|nt        |      |z    c}S c c}w r  rj  )r  r  r8  s      r   r  zPermuteView._map_neg_dims  s)    @DEsaxSY_4EEEs   $c                   t        | j                  | j                              t        t        t	        | j                                    k(  sJ | j
                  j                         }| j                  D cg c]  }||   	 c}S c c}w r   )r:   r  r  r   r   rB  r   )r  r   r   s      r   r   zPermuteView.get_size  so    $,,TYY78J#dii.!=
 
 	
 
 yy!!#!%+AQ+++s   7Bc                L   t        | j                        D ci c]  \  }}||
 c}}t        t        | j                              D cg c]  }|   	 c}t	              t	        t        t        | j                                    k(  sJ 	 	 	 	 dfd}|S c c}}w c c}w )Nc                4    D cg c]  }| |   	 c}S c c}w r   r   )r   r   invs     r   r   z+PermuteView.make_reindexer.<locals>.reindex  s     '**E!H***s   r  )r   r  r   r   r:   )r  r   r  r   r  s       @r   r  zPermuteView.make_reindexer  s     !*$)) 451q!t5$S^45!s1v5#*U3tyy>-B"CCCC	+!	+	+
  65s   BB!N)r   r   r  r\  r   rd  )r  r\  r   	list[int]rD  r  )	r   r   r   r   r  r  r  r   r  r   r   r   r  r    sB    
. .$ F F,	3r   r  c                  F    e Zd Zedddd       Ze	 	 	 	 dd       ZddZy)	SqueezeViewN)r8  c          	     D   t        |      rOt        |      \  }}g }g }|?t        |t              sJ t	        |             d|k  r|t        |j                        k  sJ t        t        |j                  |j                              D ]  \  }\  }}	|Mt        j                  j                  j                  |      r5|j                  |       |j                  |	       X||k7  r#|j                  |       |j                  |	       |dk(  rJ d        t        |j                   |j"                  |||j$                  |j&                        }
t)        ||
      S |[t*        j-                  ||j/                         D cg c]-  }t        j                  j                  j                  |      s|/ c}      S |j/                         |   dk(  sJ t*        j-                  |t        |j/                               D cg c]  \  }}||k7  s| c}}      S c c}w c c}}w )Nr   rA   zexpected squeezed size to be 1rA  )r  rD  r   r   r   r   r   r   r   r   rn   r   r   r  r  rE  r   r   rF  rG  rH  r  r  r   )r  r   r8  rK  rL  r  rM  r   r   r   rN  r   s               r   r  zSqueezeView.create  s    #"7":GZHJ!#s+6T#Y6+CxC#joo*>$>>>%.s:??JDUDU/V%W K!>D&;77++@@F -"))&1Cx -"))&1#qyJ*JJyK %!!  !!$$J #
CC;;; ZZ\77++@@C   ::<$)));;q1::<1H"UAAQTH1"UVV #Vs    2H
=HHc                    | D cg c]
  }|dk7  s	| }}t        |       D cg c]  \  }}|dk7  s| c}}t        |       dfd}||fS c c}w c c}}w )NrA   c                    t        |       t              k(  sJ |  d        t        j                  j                  gz  }t	        |       D ]
  \  }}|||<    t        |      S )N )r   r   r  r  r   r   )r   r  r   r   lengthnot_ones       r   r   z%SqueezeView.squeezer.<locals>.reindex<  sk    u:W-C%'/CC-%*WW\\NV$;Igu- #Q!"	####r   )r   rE  r   ztuple[Expr, ...])r   r   )r   r   r  r   r   r  r  s        @@r   squeezerzSqueezeView.squeezer4  s_      $.!qAvA..!*4;AAF1;T	$    /;s   
AAAAc                    t        d      )Nzuse SqueezeView.create())AssertionError)r  rB  s     r   r  zSqueezeView.__init__E  s    788r   )r   r   r8  rW  r   r   )r   rE  r   z>tuple[list[int], Callable[[Sequence[Expr]], tuple[Expr, ...]]])rB  r   r   r   )r   r   r   r  r  rk  r  r  r   r   r   r   r     sC    7; +W +WZ !!	G! ! 9r   r   c                  l    e Zd ZU ded<   ded<   	 	 ddZddZddZeZe	 	 	 	 	 	 	 	 dd       Z	dd	Z
y
)GenericViewrE  r   r  r   c                    | j                   S r   )r   r  s    r   r  zGenericView.make_reindexerN  s     ||r   c                   t        t        | j                              D cg c]  }t        t        j
                  |       }}t        | j                  |            }ddj                  t        t        |             d| S c c}w )Nzlambda r  r  )r   r   r   rh   r@   r  r   r   r  r  r   )r  r  	index_old	index_news       r   reindex_strzGenericView.reindex_strS  ss    CHTYYCX
>?*4::q9
	 
 i01	3sI#6789+FF	
s   !Bc                z    | j                  | j                  d| j                   d| j                          g      S )Nsize=zreindex=)r  rB  r   r  r  s    r   r  zGenericView.__str__Z  s=    YY%		{+x8H8H8J7K-LM
 	
r   c                *     | |t        |      |      S )NrB  r   r   )r   )r  r   r  r   s       r   r  zGenericView.createa  s     X@@r   c                    | j                   S r   rY  r  s    r   r   zGenericView.get_sizej  rZ  r   Nr  rU  )r   r   r  rE  r   r  r   rd  rD  )r   r   r   r   r  r  r  r  r  r  r   r   r   r   r  r  I  sp    
77	3
G

 HAA !A <	A
 
A Ar   r  c                      e Zd ZdZedd       Zeed	d              Ze	 	 	 	 	 	 d
d       Z	e	 d	 	 	 	 	 	 	 dd       Z
e	 d	 	 	 	 	 	 	 dd       Zy)r  z
    This class handles tensor reshaping by computing appropriate index transformations
    to map the new shape back to the original storage layout.
    c                    t        j                  |       } t        j                  |      }t        j                  j                  j
                  j                  } |t        j                  | d            r| |z   } | S r  )r   r  rn   r   r   r   evaluate_exprLt)r   r   r  s      r   handle_negative_indexzView.handle_negative_indexu  sZ    ll3||D!((22@@#q)**C
r   c                    t        t              sJ t                      j                  |j	                               \  t
        j                  j                  j                        r|S t        t                    dkD  xs t        t                    dkD  t        |      }	 	 	 	 	 	 	 	 dd	 	 	 	 d fd}dv rdfd}  |t              |      S |r |t        j                              S t        |      s ||      S t!        |d      \  }}|j"                  }t
        j                  j                  j%                        }	t
        j                  j                  j%                  |      }
t
        j                  j                  j%                        }ddlm}  ||	|
|	      }||D cg c]9  }t+        |d
      r|j,                  j.                  nt1        j2                  |      ; }}t5        |j6                  |j8                  ||j:                  |j<                        }t?        ||      S  ||      S c c}w )Nr   c                    t        | d      \  }}t        |j                  |j                  |||j                  |j
                        }t        ||      S )NT)r  rA  )rD  rE  r   r   rF  rG  rH  )ra  r  rM  rK  rL  rN  s         r   create_reinterpret_viewz,View.create.<locals>.create_reinterpret_view  sX     #8T"RGZ$!!  !!$$J #
CCr   c                    r2t         j                  |       }  | t        j                              S j	                        } | t              |      S )z
            Handle the case where view is not possible with current strides.
            For unbacked symbols, make contiguous; otherwise use dynamic_reshape_indexer.
            r  )r  require_contiguousr  r  r  r   )r   r   r  r  r  r  unbacked_symbols_in_sizess     r   "handle_unbacked_or_dynamic_reshapez7View.create.<locals>.handle_unbacked_or_dynamic_reshape  s`     ) !33A6.x!B!B8!L  11(HEGADNGDDr   c                4    t        dgt              z        S r  )r   r   )r   r  s    r   fake_reindexz!View.create.<locals>.fake_reindex  s    aS3x=011r   r  Fr  )_compute_stridesize_obliviousr   rA  )ra  r   r  rE  rM  rE  r   rH  r   r   r   r   )r   r   r   ztuple[int, ...]) r   r   r   resolve_negative_sizer   rn   r   r   statically_known_list_equalsr   r2   r  r   r  r  r  rD  r   to_symints_or_intstorch._subclasses.fake_implsr$  r  r   exprr   r!   rE  r   r   rF  rG  rH  )r  r   r  r  r!  r#  rK  rL  
old_strideold_size_symintold_stride_symintnew_size_symintr$  new_stride_symintr   rM  rN  r  r  r   s   ` `              @@@r   r  zView.create~  sf    (H-=tH~=- 66qzz|XN( 77888LH %h/014 8(23a7 	" 9;	D	D#1	D?M	D	D	E	E	E 	E( =2 ADNLII *8^>>xH 
 %Q'5a88 4AeD&&
 ''**==hGGG,,??
K''**==hG@ ,4	
 ( +  'q&1u}}Q7GGJ 
 %!!  !!$$J #
CC 2!44!s   	>Ic                F   |D cg c]+  }t         j                  j                  j                  |      - }}| D cg c]+  }t         j                  j                  j                  |      - } }t	        |      }t        t        |            D ]J  }||   dk(  st        j                  j                  ||<   t        t        |       t        |            ||<    n t         j                  j                  j                  t        |       t        |             | |fS c c}w c c}w )Nr&  )rn   r   r   r  r   r   r   r   r  Oner<   ri   check_equals)r  r  r   r   s       r   r(  zView.resolve_negative_size  s     ;CCQAGG$$--a0CC:BCQAGG$$--a0CC>s8}% 	A{b #ggkk&}X'>h@WX		 	
%%mH&=}X?VW!! DCs
   0D0DNc                    	 | j                  |||      }|S # t        t        f$ r@ t        |      g}| j                  ||      }| j                  ||      }t	        ||      }Y |S w xY wr   )_dynamic_reshape_indexerr	  
IndexErrorri   r   )r  r  r  	dense_dimr   flatr   r   s           r   r  zView.dynamic_reshape_indexer  sz    	:228XyQG  
+ 	:!(+,D33HdCH33D(CH%h9G	:s    AA&%A&c                t   t         j                  j                  j                  }t	        t        |            D cg c]  }t        t        j                  |       c}t        t        |            }t        |       }|duxr! |t        |      dz
  k7  xr t        |      dk(  }|r&|J |j                  |      }|j                  |       g |r=|r:|j                         }	|j                         \  }
}|	dk(  r>j                  t        j                  j                         |j                  |
|f       n|dk(  r|j                  |	       n ||       ||	      k(  r=j                  |
       t         j                  j                  j!                  ||	       nh ||       ||	      k  r ||       ||	      k  r2|j                         \  }}||z  |
z   }
||z  } ||       ||	      k  r2j                  |
       t         j                  j                  j!                  ||	       n ||       ||	      kD  rt        j                  j"                  }|	}j                  t%        |
||             ||z  } ||       ||	      kD  rH|j                         }j                  t%        |
||             ||z  }|	|z  }	 ||       ||	      kD  rHt         j                  j                  j!                  ||	       nt&        |r|r:|rf|j                         }	t         j                  j                  j!                  |	d       j                  t        j                  j                         |rf|r@|j                         \  }
}t         j                  j                  j!                  |d       |r@|At        |      dk(  r3j)                          j                         }j+                  ||       nj)                          t              t        |       k(  sJ 	 	 	 	 dfd}|S c c}w )zG
        Perform a reshape entirely by modifying indexing math
        NrA   c                    t        |       t              k(  sJ t        |       t              f       t        t        |             t        fdD              S )Nc              3  6   K   | ]  }t        |        y wr   )rj   )r   r   replacementss     r   r   zAView._dynamic_reshape_indexer.<locals>.reindex.<locals>.<genexpr>d  s     HA|4Hr  )r   r   r   r   )r   r=  r  	view_exprs    @r   r   z.View._dynamic_reshape_indexer.<locals>.reindex_  sO     u:T*CSZT,CC*D% 01LHiHHHr   r  )rn   r   r   r   r   r   rh   r@   VIEWr   r   r  r  r   r  r  r4  r3  r?   r	  reverseinsert)r  r  r8  r   r   	stack_new	stack_oldreordering_dense_dimold_dimsize_oldvarsize_newvar2	size_new2divisormodulus
dense_exprr   r  r>  s                     @@r   r6  zView._dynamic_reshape_indexer  s    GG$$..	 CHHBV
=>*499a8
 T8,-	N	 T! #S^a//#H" 	
  (((mmI.GW%	I }}H%MMOMC1}  .  #x1Q  *8$	((;;  %  --hA8$y'::)Ih,??&/mmoOD)/C/C')3H  )Ih,??   %  --hA8$y'::''++"  gw!GH!G+)Ih,??'mmoG$$_S'7%KL%/G''1H	  )Ih,??
   --hA$$= I@  }}HGG))(A6UWW\\* 
 %MMOMCGG))(A6   S]a%7"JY
39~X...	I!	I	I [
s   !P5)r   r    r   r    r   r    )r   r   r  rE  r   r   )r  rE  r  rE  r   ztuple[list[Expr], list[Expr]]r   )r  rT  r  rT  r8  rW  r   &Callable[[Sequence[_T]], Sequence[_V]])r  rE  r  rE  r8  rW  r   r  )r   r   r   rh  rk  r  r  r   r  r(  r  r6  r   r   r   r  r  n  s    
   i5  i5V " ",:"	&" "  
 $(	$ % !	
 
0    $(X X X !X 
4	X Xr   r  c                       e Zd ZU dZded<   d fdZddZeZddZddZ	ddZ
edd	       Zdd
ZddZddZddZddZddZ ed       	 d	 	 	 dd       ZdddZd dZ xZS )!rH  z*Pretend our storage has a different layoutr  rC  c                    t         |           t        | j                  t              r0t
        j                  | d| j                  j                                y y )NrB  )r@  r  r   rB  rd  r   r~  re  rN  s    r   r  zReinterpretView.__post_init__o  s@    dii*tVTYY-B-B-DE +r   c                P    | j                  | j                  | j                  g      S r   )r  rB  rC  r  s    r   r  zReinterpretView.__str__t  s&    		
 	
r   c                6    | j                   j                         S r   r  r  s    r   rh  zReinterpretView.get_name~  r  r   c                .    | j                   j                  S r   )rC  r   r  s    r   r  zReinterpretView.get_device  s    {{!!!r   c                     y r   r   r  s    r   r  zReinterpretView.get_origin_node  r  r   c                .    | j                   j                  S r   )rC  r   r  s    r   r   zReinterpretView.dtype  s    {{   r   c                @    t        | j                  j                        S r   )r   rC  r   r  s    r   r   zReinterpretView.get_size  s    DKK$$%%r   c                @    t        | j                  j                        S r   )r   rC  r   r  s    r   r(  zReinterpretView.get_stride  s    DKK&&''r   c                     d fd}|S )Nc                T   j                   j                         }t        j                  j	                          ||             }j                   j
                  j                  j
                  k7  r5t        j                  |j
                  j                  j
                        S |S r   )rC  r  rl   loadrh  r   rB  to_dtype_bitcast)r   r  
tmp_loaderr  s      r   r  z+ReinterpretView.make_loader.<locals>.loader  sp    kk..0G$--/75>BJ{{  DIIOO3++J

DIIOOTT!!r   r   rE  r   rm   r   r  r  s   ` r   r  zReinterpretView.make_loader  s    	" r   c                6    | j                   j                         S r   )rC  r  r  s    r   r  zReinterpretView.make_indexer      {{''))r   c                    | j                   S r   rC  r  s    r   r  zReinterpretView.get_layout  r  r   c                     y r   r   r  s    r   r	  zReinterpretView.freeze_layout  r  r   c                    t        | j                  j                  |      t        | j                  j                  |      z  t        | j                  j                  |      z  S r   )r(   rC  r   r   rF  r$  s     r   r%  z$ReinterpretView.get_free_symbol_uses  sQ    
 T[[--}=t{{11=ABt{{11=AB	
r   c                t   t         j                  j                  j                  | j                  | j
                  j                  | j
                  j                  | j
                  j                  ||j                  n#t         j                  j                  j                  | j
                  j                        S r  )rn   r   wrapper_codecodegen_reinterpret_viewrB  rC  r   r   rF  	writeliner   r  s     r   r  z!ReinterpretView.codegen_reference  s     ww##<<IIKKKKKK & 2F8L8L8V8V++## = 
 	
r   c                     yr  r   r  s    r   r   zReinterpretView.num_reads      r   r8  rU  rK  r;  r?  rD  rO  rQ  rA  rZ  re  r   rJ  rc  )r   r   r   rh  r   r  r  r  rh  r  r  rn  r   r   r(  r  r  r  r	  rY   r%  r  r   r  r  s   @r   rH  rH  i  s    4NF

 H$" ! !&(	* -.$)
!
	!
 /

r   rH  c                  \    e Zd ZU dZded<   ed
d       ZddZeZe	dd       Z
ddZddZy	)	DtypeViewz(Pretend our storage has a different typer@  target_dtypec                    t        |      r]t        |      \  }}t        |j                  ||j                  |j
                  |j                  |j                        }t        ||      S t        ||      S )NrA  )rB  rm  )
r  rD  rE  r   r   r   rF  rG  rH  rl  )r  r   	new_dtyperK  rL  rN  s         r   r  zDtypeView.create  sm     #"7":GZ$!!!!!!$$J #
CCai88r   c                P    | j                  | j                  | j                  g      S r   )r  rB  rm  r  s    r   r  zDtypeView.__str__  s     		4+<+<=>>r   c                    | j                   S r   )rm  r  s    r   r   zDtypeView.dtype  s       r   c                6    | j                   j                         S r   rB  r   r  s    r   r   zDtypeView.get_size  r  r   c                L      j                   j                         d fd}|S )Nc                z    t        j                   |       j                  j                  j                        S r   )rl   r[  rm  rB  r   )r   r  r  s    r   r  z%DtypeView.make_loader.<locals>.loader  s*    ''c
D4E4EtyyWWr   r,  rB  r  )r  r  r  s   ` @r   r  zDtypeView.make_loader  s"    		%%'	X r   N)r   r   ro  r@  r   rd  rU  r?  rD  rO  )r   r   r   rh  r   r  r  r  r  rn  r   r   r  r   r   r   rl  rl    sE    29 9? H! !$r   rl  c                  d    e Zd Ze	 	 	 	 	 	 	 	 	 	 dd       Ze	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zy)	SliceViewc                l   	
 t         j                  j                  
|j                         |   t	        d ||fD              r!t
        j                  	t
        j                  n
j                  	
j                  d	
fd	 	 	 	 	 	 	 	 	 	 d fd} ||dd      } |||      }||fS )zz
        Normalize start and end such that both are in the range
        [0, x.get_size()[dim]] and start <= end.
        c              3  2   K   | ]  }t        |        y wr   )r2   r   r   s     r   r   z0SliceView.normalize_start_end.<locals>.<genexpr>  s     HA$Q'H   c                    j                  | |      r| n | |      }j                  ||      r|}|S  ||      }|S r   )statically_known_geqr2  )r   lowerupperclamped_lowerclamped_fullmax_funcmin_funcr   s        r   clampz,SliceView.normalize_start_end.<locals>.clamp  s`    221e<(1eBT 
 00F  
   mU3 
  r   c                D    | |S j                  |       }  | ||      S r   )r  )r  r  r  r  r  r  dim_sizes       r   
clamp_wrapz1SliceView.normalize_start_end.<locals>.clamp_wrap  s0     {++C:CeU++r   r   )r   r    r  r   r  r   r   r    )
r  zUnion[int, None]r  r   r  r   r  Union[Expr, int]r   r  )
rn   r   r   r   r  r   MinMaxevaluate_minevaluate_max)r  r   r8  startendr  r  r  r  r  r   s   `     @@@@@r   normalize_start_endzSliceView.normalize_start_end  s     77##::<$H%h1GHHyyHyyH,,H,,H		 	,!	,*-	,69	,DT	,	, 5!Xq1eXx8czr   c           	        t        j                        t        t              sdkD  sJ        	 dk(  r|dk\  rdk(  r|S t        |j                               |r| j                  ||      \  }t        |z
  dz
  z         <   t        |      rt        |      \  }}t        |j                        }	|	   z  |	<   t        |j                  |j                  |	|j                  |j                     z  z   |j                         }
t#        ||
      S 	 	 	 	 dfd}t%        ||      S # t        $ r Y w xY w)Nr   l    rA   rA  c                    t        |       t              k(  sJ d|  d        t        |       } |    z  z   | <   | S )Nzwrong ndim r  )r   r   )r   r8  r  r  steps    r   r   z!SliceView.create.<locals>.reindex@  sR     u:X.P+eWAhZ0PP.KEsd*U2E#JLr   r  r  )r   r  r   r    	TypeErrorr   r   r  r=   r  rD  r   rE  r   r   rF  rG  rH  rx  )r  r   r8  r  r  r  r  rK  rL  rM  rN  r   r  s     `` `      @r   r  zSliceView.create  sk    ||D!$%7471	zcY.419 

%
 00CDJE3 uq!94@ #"7":GZj//0J(o4JsO$!!  !!J$5$5c$:U$BB$$J #
CC	!		 	 ah@@K  		s   D> >	E
EN)
r   r   r8  r   r  r   r  r   r   ztuple[int, int])rA   T)r   r   r8  r   r  r   r  r   r  r   r  r   r   r   )r   r   r   r  r  r  r   r   r   rx  rx    s    '' '),'36'	' 'R  3A3A 3A 	3A
 3A 3A 3A 
3A 3Ar   rx  c                  B    e Zd ZU ded<   ded<   d
dZddZddZddZy	)BaseConstantr@  r   rN  r   c                     yNr   r   r  s    r   r   zBaseConstant.get_sizeQ  s    r   c                    | j                   S r   r  r  s    r   r  zBaseConstant.get_deviceT  r  r   c                     y r   r   r  s    r   r  zBaseConstant.get_origin_nodeW  r  r   c                    t               S r   r9   r  s    r   r  zBaseConstant.get_readsZ  r  r   NrD  rK  r;  rb  )r   r   r   r   r   r  r  r  r   r   r   r  r  L  s"    r   r  c                  D    e Zd ZU ded<   ded<   ded<   ddZddZdd	Zy
)Constantr   r  r@  r   rN  r   c                     d fd}|S )Nc                X    t        j                  j                  j                        S r   )rl   r  r  r   r   r  s    r   r  z$Constant.make_loader.<locals>.loadere  s    <<

DJJ77r   r]  r   r^  s   ` r   r  zConstant.make_loaderd  s    	8 r   c                     y r   r   r  s    r   r  zConstant.realizej  r  r   c                F    t        | j                  | j                  |      S )N)r  r   r   )r  r  r   r  s     r   r/  zConstant.constant_to_devicem  s    djj

6JJr   NrO  rH  rf  )r   r   r   r   r  r  r/  r   r   r   r  r  ^  s#    JKr   r  c                  <    e Zd ZU ded<   ded<   ded<   d
dZddZy	)IndexingConstantr   r   r@  r   rN  r   c                     d fd}|S )Nc                X    t        j                  j                  j                        S r   )rl   r  r   r   r  s    r   r  z,IndexingConstant.make_loader.<locals>.loaderx  s    >>$**djj99r   r]  r   r^  s   ` r   r  zIndexingConstant.make_loaderw  s    	: r   c                F    t        | j                  | j                  |      S )N)r   r   r   )r  r   r   r  s     r   r/  z#IndexingConstant.constant_to_device}  s    djj

6RRr   NrO  rf  )r   r   r   r   r  r/  r   r   r   r  r  q  s    JSr   r  c                L   d}d}t        t        t        ||                   D ]  \  }}|dk(  rt        j                  j
                  j                  ||      s,t        j                  j
                  j                  ||      s y|t        j                  d|      z  }||z  } yNrA   FT)	reversedr   r   rn   r   r   r3  r   r  )r   r7  expected_strideexpected_stride_maxr   ys         r   is_contiguous_strides_for_shaper    s     Os5&123 
16ww77
''""::1>QRuyyA.1
 r   c                <    t         j                  | j                  z  S r   )rB   padding_alignment_bytesitemsizer  s    r   get_align_for_dtyper    s    ))U^^;;r   c                  2    e Zd ZdZddZddZ	 d	 	 	 d	dZy)
r  zxAbstract base for Layout, MultiOutputLayout, NoneLayout.
    Represents the memory layout of the output of an Operation.c                >    t        t        |       j                        r   r  r  s    r   r  zOutputSpec.get_device  r  r   c                >    t        t        |       j                        r   r  r  s    r   storage_sizezOutputSpec.storage_size  r  r   c                >    t        t        |       j                        r   r  r$  s     r   r%  zOutputSpec.get_free_symbol_uses  r  r   NrK  rc  rZ  re  )r   r   r   rh  r  r  r%  r   r   r   r  r    s,    C77 %*7!7	!7r   r  c                     e Zd ZdZd ed      df	 	 	 	 	 	 	 	 	 	 	 	 	 ddZedd       Zej                  dd       Zedd       Z	e	j                  dd	       Z	edd
       Z
e
j                  d d       Z
d!dZeZd"dZd#dZd$dZe	 	 	 	 	 	 d%d       Zd$dZd&dZd$dZe	 	 	 	 	 	 	 	 d'd       Zd(dZd$dZd)dZd*dZd+dZddZ ed       	 d,	 	 	 d-d       Zy).r  zo
    Layout base class

    Carries tensor meta-information including offset and
    whether it is pinned.
    Nr   Fc                P   |t         j                  |      }|| _        || _        t	        |      t	        |      k(  sJ d| d|        t        d |D              sJ || _        || _        || _        || _	        | j                  r!| j                  j                  dk(  sJ d       y y )Nr  	, stride=c              3  H   K   | ]  }t        |t        t        f        y wr   )r   r    r   r   s     r   r   z"Layout.__init__.<locals>.<genexpr>  s     <!:a$-<    "r  zOnly CPU tensors can be pinned)r  r  r   r   r   r   _size_stride_offsetrG  r   )r  r   r   r   r   rF  rG  s          r   r  zLayout.__init__  s     >#66t<F
4yCK'H5ix)HH'<t<<<<
"NN(8(8E(A 	
,	
B(ANr   c                    | j                   S r   r  r  s    r   r   zLayout.size      zzr   c                    || _         y r   r  r  r  s     r   r   zLayout.size  s	    
r   c                    | j                   S r   r  r  s    r   r   zLayout.stride      ||r   c                    || _         y r   r  r  s     r   r   zLayout.stride  	    r   c                    | j                   S r   r  r  s    r   rF  zLayout.offset  r  r   c                    || _         y r   r  r  s     r   rF  zLayout.offset  r  r   c                   d}| j                   dk7  rd| j                    }| j                  j                  dnd| j                  j                   }d}| j                  rd| j                   }t	        |       j
                   d| j                  j                   | d| j                   d| j                   d	| j                   | | d
S )Nr  r   z	, offset=:z, is_pinned=z('z', z, size=r  r  )	rF  r   r   rG  r   r   r   r   r   )r  rF  device_index_stris_pinned_strs       r   r  zLayout.__str__  s    ;;! .F!%!2!2!:2!DKKDUDUCV@W>>*4>>*:;MDz""#2dkk&6&6%78H7ITZZL YII;i}VH]O1N	
r   c                    | j                   S r   r  r  s    r   r  zLayout.get_device  r  r   c                
   t         j                  5  t        j                  t	        | j
                        t	        | j                        | j                  | j                  | j                        cd d d        S # 1 sw Y   y xY w)N)r   r   
pin_memory)
rn   	fake_moder  r	  r\   r   r   r   r   rG  r  s    r   get_examplezLayout.get_example  sY    [[ 	&&'		2'4jj{{>>	 	 	s   AA99Bc                B    t        | j                  | j                        S r   )r  r   r   r  s    r   r  zLayout.is_contiguous   s    .t{{DIIFFr   c                    t        |       }|dvs| d   dk(  ryt        |t        |       |       D ]  \  }}}|dk7  s||k7  s y y)N)r      rA   FT)r   r   r-   )r7  rJ  ndimleftrightr   s         r   is_channels_last_contiguousz"Layout.is_channels_last_contiguous  sb     5zvqQ!$*51"
 	D% qyTU]	 r   c                    t        | j                  t        t        j	                  t        t        | j                                          | j                        D ]  \  }}}|dk7  s||k7  s y yr  )r   r   r  r  r  r   r   )r  r  r  r   s       r   is_transposedzLayout.is_transposed  sc    !$KK^66tHTYY<O7PQRII"
 	D%
 qyTU]	 r   c                   t        | j                        t        |      k(  sJ t        | j                        D cg c]5  \  }}t        j
                  j                  j                  |d      dk7  r|7 }}}|D cg c]  }| j                  |    }}|D cg c]  }||   	 }}d	d} ||      }dgt        |      z  }t        t        |            D ]  }||   |||   <    t        t        |      dz
        D ][  }||   ||dz      kD  }t        |t              s7t        j
                  j                  j                  ||   ||dz      kD  d      }|s[ y yc c}}w c c}w c c}w )
Nr   r  rA   c                `    t        |       }| D cg c]  }|j                  |       c}S c c}w r   )rf  r   )arr
sorted_arrelements      r   sorted_indicesz0Layout.is_stride_ordered.<locals>.sorted_indices,  s*    J=@A'J$$W-AAAs   +r&  Tr%  F)r  r\  r   r\  )r   r   r   r   rn   r   r   r  r   r   r   
_shape_envr  )	r  r   r   r8  non_1_indicesr   r  stride_orderedr,  s	            r   r  zLayout.is_stride_ordered  s~   4;;3u:---
 $DII.
3ww11#1BaG 
 
 +88Q$++a.882?@Qa@@	B
 u% E
*s5z" 	1A'-ayN58$	1 s5zA~& 	A!!$~a!e'<<DdD)ww))77"1%q1u(==d 8  	 ;
 9@s   :E=E"E'c                    dgt        t        t        dt        | j                        dz
                    z   }t        |      g|z   }| j                  |      S Nr   rA   )r   r  r   r   r   r  r  s     r   is_channels_last_stride_orderedz&Layout.is_channels_last_stride_orderedB  sN    d8E!S-=-A$BCDDUu$%%e,,r   c                Z   t        |      }t        |       dk(  r| S t        j                  st        j                  ||       r| S t        j                         }t        |d      r|j                  j                  dd      r| S t        d t        j                  | |      D               }t        j                  s|r| S t        t        j                  d      rt        j                  j                   nddfdrt#        fd	| D              r| S t%        |       }t'        |      }t)        t        |             D cg c]  }d }	}d
|	|d   <   d}
t+        |d
d d
      D ]  \  }}||d
z
     }|	|   ||   z  }t-        |t.        t0        j2                  f      xr |t        j4                  kD  xr ||z  dk7  xs, t-        |t0        j6                        xr t        j                  }||	|<   |st9        ||      |z  |	|<   d}
 |
s| S t:        xj<                  d
z  c_        |	S c c}w )z
        The padding does not change stride order but makes sure all strides larger
        than the threshold are multiple of align.
        r   rW  dislike_paddingFc              3  \   K   | ]$  }t        |t        t        j                  f       & y wr   r   r   s     r   r   z&Layout._pad_strides.<locals>.<genexpr>a  s(      
 q3./
r   r  Nc                |    yt        | t        j                        syt        fd| j                  D              S )NFc              3  @   K   | ]  }j                  |        y wr   )is_unbacked_symint)r   r   r   s     r   r   zILayout._pad_strides.<locals>.contains_unbacked_symints.<locals>.<genexpr>o  s     R1y33A6Rs   )r   r   r    r  r1   )r,  r   s    r   contains_unbacked_symintsz6Layout._pad_strides.<locals>.contains_unbacked_symintsj  s4     dEJJ/R@Q@QRRRr   c              3  .   K   | ]  } |        y wr   r   )r   r   r  s     r   r   z&Layout._pad_strides.<locals>.<genexpr>r  s     Na6q9Nr3  rA   )r  T)r,  zsympy.Expr | intr   r   )r  r   rB   pad_channels_lastr  r  rn   get_current_noder  rW  r  r   r  chainpad_dynamic_shapesr   r  r  r   r   r   r   r   r   r   r!   padding_stride_thresholdr    rZ   r&   num_comprehensive_padding)
in_stridesr   r   aligncurrent_fx_noderd   r  r   r   new_stridespaddedrankr   prev_idxr   require_paddingr  r   s                   @@r   _pad_strideszLayout._pad_stridesH  s0    $E*z?a''F,N,N*-
 ,,.?F+0D0D0H0Hu1
   
__Z6
 
 

 ((Z*1!''<*HAGG&&d		S N:NN'
I>,\:
"'J"89Qq99 &'JqM"":ab>; 	ID#!$(+H *T(^;F 6C#78 (V<<<(UNa'P VUZZ0NV5N5N	 
  &K#*65#9E#AC 	   	))Q.); :s    	H(c                    t        | t              sJ t        |              | j                  J | j	                  | j                  | j
                  | j                        | _        y r   )r   r  r   r   r  r   r   r  s    r   r  zLayout.pad_strides  sM    $/;d;/{{&&&''TYY

Kr   c                F    t         j                  xr t        | t              S r   )rB   comprehensive_paddingr   r  r  s    r   r  zLayout.should_pad_strides  s    ++P
40PPr   c                    t        | t              r| S | j                         r| j                          t        | j                  | j
                  | j                  | j                  | j                  | j                        S r   )
r   rE  r  r  r   r   r   r   rF  rG  r  s    r   as_fixedzLayout.as_fixed  s`    dK(K""$KKJJIIKKKKNN
 	
r   c                    t         j                  sJ dt        |       j                   d       | j	                         j                         S )Nzconvert z to FixedLayout first)r  r  r   r   r  r  r  s    r   r  zLayout.make_indexer  sG    ,, 	
tDz**++@A	
, }}++--r   c                f   t        |t              xr | j                  |j                  k(  xr | j                  |j                  k(  xrj | j                  |j                  k(  xrO | j
                  |j
                  k(  xr4 | j                  |j                  k(  xr | j                  |j                  k(  S r   )r   r  r   r   r   r   rF  rG  )r  others     r   __eq__zLayout.__eq__  s    uf% 2u||+2

ekk)2 		UZZ'2 u||+	2
 u||+2 %//1	
r   c                X    t        | j                  | j                  | j                        S r   )r*   r   r   rF  r  s    r   r  zLayout.storage_size  s    .tyy$++t{{SSr   c                    t        | j                  |      t        | j                  |      z  t        | j                  |      z  S r   )r(   r   r   rF  r$  s     r   r%  zLayout.get_free_symbol_uses  s=    
 TYY6t{{M:;t{{M:;	
r   )r   rN  r   r@  r   rE  r   zOptional[Sequence[Expr]]rF  r    rG  r   r   r   rD  r  rE  r   r   rG  r  r    r   r   rU  rM  )r   torch.TensorrC  )r7  rT  rJ  rT  r   r   )r   r\  r   r   )r  r\  r   rE  r   r@  r   r\  r8  r   rE  rQ  )r	  r   r   r   rZ  re  )r   r   r   rh  r!   r  rn  r   setterr   rF  r  r  r  r  r  rk  r  r  r  r  r  r  r  r  r  r
  r  rY   r%  r   r   r   r  r    s    ,0qz

 
 	

 )
 
 
 

2   
[[    ]]    ]] 
 HG !,>	 "!F- L!L)7L@KL	L L\L
Q
.	
T H%$)
!
	!
 &
r   r  c                      e Zd ZdZddZy)rE  z A Tensor layout we cannot changec                X    t        | j                  | j                  | j                        S )r  )r  r   r   rF  r  s    r   r  zFixedLayout.make_indexer  s    diidkkBBr   NrQ  )r   r   r   rh  r  r   r   r   rE  rE    s    *Cr   rE  c                      e Zd ZdZdZddZedd       Zedd       Zedd       Z	e	 	 	 	 	 	 dd       Z
e	 	 	 	 	 	 dd       Zedd	       Zej                  dd
       Zedd       Zej                  dd       Zedd       Zej                  dd       Z	 d 	 	 	 	 	 d!dZ	 d 	 	 	 	 	 d"dZd#dZd$dZd%dZd&dZ	 	 d'	 	 	 	 	 	 	 	 	 	 	 d( fdZ xZS ))r  z|
    A Tensor layout that we are allowed to change

    Assumption: layout change should NOT add or remove free symbols
    Fc                H    t        j                  |       j                         S )z
        Compute what the strides would be if this layout were frozen,
        without actually modifying the layout. This is used for speculative
        stride computation during Triton template code generation.
        )r1  deepcopyr  r  s    r   !get_fixed_layout_without_freezingz0FlexibleLayout.get_fixed_layout_without_freezing  s     }}T"++--r   c                    t        |       dk(  rg S t        j                  j                  g}t	        | dd        D ]  }|j                  ||d   z          t        t	        |            S )Nr   rA   r&  )r   r   r  r3  r  r  r   )sizesreversed_stridesr   s      r   r  z!FlexibleLayout.contiguous_strides  sh    u:?I!GGKK=U12Y' 	AD##D+;B+?$?@	AH-.//r   c                    t        t        t        |                   t        |      k(  s	J | |f       t        j                  j
                  }dgt        |      z  }|D ]  }|||<   || |   z  } |S )z
        Create a stride based on the order the dimensions should be filled in.

        In this format, channels last would be:
            [1, 3, 2, 0]
        N)r:   r   r   r   r  r3  )r  r   next_striderJ  r   s        r   fill_orderedzFlexibleLayout.fill_ordered  sx     %E
+,
50AAQE5>QAggkk&3u:% 	1A$GAJ%a0K	1 r   c                    t        t        t        |                   t        |      k(  sJ t        |      }t        j                  | |      S )z
        Create a stride based on the sorted order of a permuted range.

        In this format, channels last would be:
            [3, 0, 2, 1]
        )r:   r   r   r   r  r  )r  r   r   s      r   r  zFlexibleLayout.stride_ordered  sB     %E
+,
50AAAA,U3
**5*==r   c                >   |t         j                  k(  rt        j                  | t              S |t         j
                  k(  rt        j                  | t              S |t         j                  k(  rt        j                  |       S t        j                  d|       t        )aq  
        Create a stride based on a memory format.

        Memory format is translasted into a stride order,
        so channels_last is the same as:
            FlexibleLayout.stride_ordered(sizes, [3, 0, 2, 1])

        This interface does not support memory_format `torch.preserve_format`
        which should be used to deduce a format from another source
        z>stride_ordered_for_memory_format, unsuppored memory_format: %s)r  channels_lastr  r  NHWC_STRIDE_ORDERchannels_last_3dNHWDC_STRIDE_ORDERcontiguous_formatr  ry  r  r  )r  memory_formats     r    stride_ordered_for_memory_formatz/FlexibleLayout.stride_ordered_for_memory_format  s     E///!008IJJe444!008JKKe555!44U;;IIP &%r   c                (   t        |       t        |      k(  sJ |D cg c]+  }t        j                  j                  j	                  |      - }}t        t        t        |            |j                        }t        j                  | |      S c c}w )z
        Create a stride that has the same stride order as given stride

        For example, if given stride is [1000, 1, 100, 10],
        the fill order should be [1, 3, 2, 0]
        r`  )
r   rn   r   r   r  rf  r   __getitem__r  r  )r  r   r   r   s       r   same_orderedzFlexibleLayout.same_ordered)  sv     5zS[(((BHIQ!''""55a8IIE#f+.F4F4FG
**5*== Js   0Bc                    | j                   S r   r  r  s    r   r   zFlexibleLayout.size8  r  r   c                6    | j                  d|       || _        y )Nr   )!assert_free_symbol_uses_unchangedr  r  s     r   r   zFlexibleLayout.size<  s    ..vu=
r   c                    | j                   S r   r  r  s    r   r   zFlexibleLayout.strideA  r  r   c                6    | j                  d|       || _        y )Nr   )r,  r  r  s     r   r   zFlexibleLayout.strideE      ..x?r   c                    | j                   S r   r  r  s    r   rF  zFlexibleLayout.offsetJ  r  r   c                6    | j                  d|       || _        y )NrF  )r,  r  r  s     r   rF  zFlexibleLayout.offsetN  r/  r   c                0   | j                  | j                  |      }| j                         r)|r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                  | j                        S r   )	r  r   r  r  r   rE  r   rF  rG  )r  r   r  rM  s       r   as_stride_orderzFlexibleLayout.as_stride_orderS  sw     ((E:
""$**:tyy$**MJKKJJIIKKNN
 	
r   c                    |}| j                         r)|r'| j                  || j                  | j                        }t	        | j
                  | j                  | j                  || j                  | j                        S r   )r  r  r   r   rE  r   rF  rG  )r  r  r  rM  s       r   as_exact_strideszFlexibleLayout.as_exact_stridesc  sf     #
""$**:tyy$**MJKKJJIIKKNN
 	
r   c                ,   | j                  | j                  |      }| j                         r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                  | j                        S r   )	r  r   r  r  r   rE  r   rF  rG  )r  r   rM  s      r   as_fill_orderzFlexibleLayout.as_fill_orders  ss    $($5$5dii$G
""$**:tyy$**MJKKJJIIKKNN
 	
r   c                ,   | j                  | j                  |      }| j                         r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                  | j                        S r   )	r)  r   r  r  r   rE  r   rF  rG  )r  r   rM  s      r   as_same_orderzFlexibleLayout.as_same_order  ss    &&tyy&9
""$**:tyy$**MJKKJJIIKKNN
 	
r   c           
     r    i }dD ]/  }dD ](  }||f}t        t        t        | |      |            ||<   * 1 |S )N)r   r   rF  TF)r:   r(   r   )r  initial_free_symbolsr   r^  ra  s        r   get_initial_free_symbol_usesz+FlexibleLayout.get_initial_free_symbol_uses  sY    !0 	D!. ]+,6$WT4%8-H-$S)	 $#r   c                    dD ]:  }| j                   ||f   }t        t        ||            }||k(  r/J d| d|         y )Nr;  z)Expected free symbols unchanged, but got z vs )r<  r:   r(   )r  r   r  r^  old_free_symbolsnew_free_symbolss         r   r,  z0FlexibleLayout.assert_free_symbol_uses_unchanged  sd    * 	M#88$9NO)*:5-*PQ#'77 ;<L;MTRbQcd7	r   c                    |rt         j                  ||      }nt         j                  |      }t        |   |||||       | j                         | _        y )NrG  )r  r  r  r@  r  r=  r<  )r  r   r   r   r  rG  rJ  rB  s          r   r  zFlexibleLayout.__init__  sU     $11$EG$77=GgK %)$E$E$G!r   r  )r  r\  r   r	  )r  r\  r   r\  r   r	  )r  r\  r   r\  r   rE  )r  r\  r%  ztorch.memory_formatr   rE  )r  r\  r   rT  r   rE  rD  r  rG  r  rZ  )r   r\  r  r   r   rE  )r  rT  r  r   r   rE  )r   r\  r   rE  )r   rT  r   rE  )r   z$dict[tuple[str, bool], sympy.Symbol])r   r   r  r3   r   r   r  )r   rN  r   r@  r   rE  r  'Optional[Sequence[Union[int, Integer]]]rG  r   r   r   )r   r   r   rh  r  r  rk  r  r  r  r&  r)  rn  r   r  r   rF  r3  r5  r7  r9  r=  r,  r  r  r  s   @r   r  r    s    N. 0 0    	> 	> &&-@&	& &4 >>&8>	> >   
[[    ]]    ]] 
 ;@
"
37
	
" HM
/
@D
	
 

	$ AEHH H 	H
 >H H 
H Hr   r  c                  \     e Zd ZdZd fdZddZddZ ed       	 d		 	 	 d
d       Z xZ	S )NonOwningLayoutz,Is a view into the storage of another tensorc                    |j                         }t        | 	  |j                  |j                  |j
                  |j                         || _        y r   )r  r@  r  r   r   r   r   view)r  rG  rC  rB  s      r   r  zNonOwningLayout.__init__  sA    "MMLLKKMM		
 	r   c                >    | j                         j                         S r   )r  r  r  s    r   r  zNonOwningLayout.make_indexer  s    }}++--r   c                    | j                   j                         j                  }|dk(  ryddlm} t
        j                  j                  j                  ||      S )Nr   TrA   )	ALIGNMENT)	rG  r  rF  utilsrJ  rn   r   r   r  )r  rF  rJ  s      r   maybe_guard_alignedz#NonOwningLayout.maybe_guard_aligned  sD    %%'..Q;$ww<<VYOOr   c                4   t        | j                  t              sJ | j                  j                  }t        |t              sJ t        |             |j                  }t        |t              sJ t        |             |j                  j                  |      S r   )	r   rG  rH  rB  rf  r   rg  rC  r%  )r  r^  boxinput_buffers       r   r%  z$NonOwningLayout.get_free_symbol_uses  sw     $))_555iinn#z*5DI5*xx,/:c:/""77FFr   )rG  zUnion[BaseView, TensorBox]r   r   rQ  rC  rZ  re  )
r   r   r   rh  r  r  rL  rY   r%  r  r  s   @r   rE  rE    sG    6.P -.$)G!G	!G /Gr   rE  c                      e Zd ZdZy)CommBufferTypesymm_memN)r   r   r   SYMM_MEMr   r   r   rQ  rQ    s    Hr   rQ  c                  F     e Zd ZU dZded<   ded<   	 	 	 	 	 	 d fdZ xZS )CommBufferLayoutax  
    A layout that signifies the buffer is a comm buffer.
    In terms of striding, the layout is identical to `FixedLayout`.

    Buffers with this layout do not participate in in-place reuse - it can be
    neither the source nor the target for in-place reuse.

    For detailed motivation and usage of this layout, see
    NOTE [lowering-time collective optimization].
    rQ  comm_buffer_typer   
group_namec                   t        |t              r|j                         n|}t        |   |j
                  |j                  |j                  |j                  |j                  |j                         || _        || _        y )Nr   r   r   r   rF  rG  )r   r  r  r@  r  r   r   r   r   rF  rG  rV  rW  )r  rC  rV  rW  fixedrB  s        r   r  zCommBufferLayout.__init__  si     &0%G!V<<++<<<<oo 	 	
 !1$r   )rC  z"Union[FlexibleLayout, FixedLayout]rV  rQ  rW  r   )r   r   r   rh  r   r  r  r  s   @r   rU  rU    s;    	 %$O%2% )% 	% %r   rU  c                      e Zd ZU ded<    ej
                  d       Zded<    ej
                  d       Zded<   dd	Zdd
Z	ddZ
y)
NoneLayoutrL  r   c                     dgS r  r   r   r   r   r_  zNoneLayout.<lambda>  s     r   default_factoryr  r   c                     dgS r  r   r   r   r   r_  zNoneLayout.<lambda>	  s    1# r   r   c                     yr  r   r  s    r   r  zNoneLayout.storage_size  rj  r   c                    | S r   r   r  s    r   r  zNoneLayout.as_fixed      r   c                    | j                   S r   r  r  s    r   r  zNoneLayout.get_device  r  r   Nrc  rB  rK  )r   r   r   r   ri  rj  r   r   r  r  r  r   r   r   r\  r\    sG     #"'k''DD)D)))+FFIFr   r\  c                       e Zd Zd
 fdZedd       Zej                  dd       ZddZddZddZ	e
	 d	 	 	 	 	 	 	 dd       ZddZdd	Z xZS )MutationLayoutSHOULDREMOVEc                   t         |   |j                         |j                         |j	                         d        || _        | j                         j                         }t        j                  j                  |       y r   )r@  r  r  r  r   r  
get_bufferrh  rn   r   mark_buffer_mutated)r  r  r   rB  s      r   r  z#MutationLayoutSHOULDREMOVE.__init__  se    &&(OO		
  ))+	##D)r   c                6    | j                         j                  S r   )real_layoutr   r  s    r   r   z!MutationLayoutSHOULDREMOVE.stride!  s    !(((r   c                     y r   r   r  s     r   r   z!MutationLayoutSHOULDREMOVE.stride%  s    r   c                >    | j                         j                         S r   )rk  r  r  s    r   r  z'MutationLayoutSHOULDREMOVE.storage_size)  s    !..00r   c                x    dfd | j                         }t        |t              sJ t        |             |S )Nc                    t        | t              r | j                        S t        | t              r | j	                               S t        | t
              r | j                        S | S r   )r   rf  r  rd  re  
MutableBoxrB  )r  unwrap_viewss    r   rq  z;MutationLayoutSHOULDREMOVE.get_buffer.<locals>.unwrap_views-  sY    &"<=#FMM22&(+#F$6$6$899&*-#FKK00Mr   )r  r   r   r   )r  r   rg  r   )r  r  rq  s     @r   rh  z%MutationLayoutSHOULDREMOVE.get_buffer,  s6    	 dkk*&&)74<7)r   c                ^    | j                         j                  }t        |t              sJ |S r   )rh  rC  r   r  )r  rC  s     r   rk  z&MutationLayoutSHOULDREMOVE.real_layout:  s)    "))&&)))r   c                   |j                          t        j                  j                  |j	                                t        |t              r|j                  }|j                          |st        j                  |j                         |j                         |j                         t        |j                         |j                               D cg c]/  \  }}t        j                  j                   j#                  ||      1 c}}      }t        |t$        t&        f      sJ |j                  }|j                          t)        |d      sJ |       t        |j                  j*                  t,              s$J t/        |j                  j*                               t1        |      |j                  _        |j                  S c c}}w )Nr  rB  )r  rn   r   ri  rh  r   r   rB  r  r  r  r  r  r  r   r   r   check_equals_and_simplifyrd  rp  r  rC  r  r   rf  )r  srcdstunsafe_aliasr  r  r   s          r   realize_intoz'MutationLayoutSHOULDREMOVE.realize_into?  sI    	 	
##CLLN3c9%((C 	##~~'mmo* !$CLLNCLLN C1 GG$$>>q!D	 $ D dXz$:;;;))CsF#(S(##((//>:QD<QQ:4S9xxs   4Gc                    | S r   r   r  s    r   r  z#MutationLayoutSHOULDREMOVE.as_fixedg  rc  r   c                6    | j                   j                         S r   )r  r  r  s    r   r  z'MutationLayoutSHOULDREMOVE.make_indexerj  r`  r   )r  r   r   r   rD  )r  r   r   r   rG  )r   rg  rA  rZ  )ru  r   rv  r   rw  r   r   r   )r   r   rQ  )r   r   r   r  rn  r   r  r  rh  rk  r  rx  r  r  r  r  s   @r   rf  rf    s    	* ) ) ]] 1
 <A%%%%59%	% %N*r   rf  c                  f    e Zd ZU ded<   ded<   d# fdZd$dZd%dZd&dZd'd	Zd(d
Z	e
d)d       Zd*dZd+dZd,dZd-dZd.dZd/dZd0dZd#dZ	 d1	 	 	 	 	 d2dZd3dZd4dZ	 d1	 	 	 	 	 d5dZd0dZd6dZd7d8dZd#dZd9dZd9dZd:dZ ed       	 d1	 	 	 d;d       Z d<d Z!d=d!Z"d0d"Z# xZ$S )>rg  rI  r   r  rC  c                F    t         |           | j                  dd        y rx  )r@  r  r  rN  s    r   r  zBuffer.__post_init__x  s    t4r   c                >    | j                         j                         S r   )r  r  r  s    r   r  zBuffer.make_indexer|  s     --//r   c                @    | j                   sJ |        | j                   S r   r  r  s    r   rh  zBuffer.get_name  s    yy$yyyr   c                    t        | j                  t              r| j                  j                         S t	        t        | j                        j                        r   )r   rC  r  r  r  r   r   r  s    r   r  zBuffer.get_example  s=    dkk6*;;**,,!$t{{"3"<"<==r   c                >    | j                         j                         S r   )r  r  r  s    r   r  zBuffer.get_device  s    ##%0022r   c                     y r   r   r  s    r   r  zBuffer.get_defining_op  r  r   c                6    | j                         j                  S r   )r  r   r  s    r   r   zBuffer.dtype  s     &&&r   c                :    g | j                         j                  S r   )r  r   r  s    r   r   zBuffer.get_size  s    ("''((r   c                :    g | j                         j                  S r   )r  r   r  s    r   r(  zBuffer.get_stride  s    *"))**r   c                6    | j                         j                  S r   )r  rF  r  s    r   
get_offsetzBuffer.get_offset  s     '''r   c                    t        | j                  t              r| j                  S t        t	        | j                        j
                        r   )r   rC  r  r  r   r   r  s    r   r  zBuffer.get_layout  s4    dkk6*;;!$t{{"3"<"<==r   c                    | j                   S r   rb  r  s    r   r  zBuffer.get_output_spec  r  r   c                "    | j                         S r   )r  r  s    r   r"  zBuffer.get_storage_numel  s    ~~r   c                6    | j                         j                  S r   )r  rG  r  s    r   get_is_pinnedzBuffer.get_is_pinned  s     ***r   c                    t        | j                  t              r;t        | j                  t              s | j                  j	                         | _        y y y r   )r   rC  r  rE  r  r  s    r   r	  zBuffer.freeze_layout  s>    dkk6*:KK4
 ++..0DK4
*r   c                    t        | j                  t              sJ t        | j                               | j                  j	                  ||      | _        y Nr  )r   rC  r  r   r3  r  s      r   r  z&Buffer.freeze_layout_with_stride_order  sA     $++~6IT[[8II6kk11%}1Ur   c                    t        | j                  t              sJ t        | j                               | j                  j	                  |      | _        y r   )r   rC  r  r   r7  r  s     r   r  z$Buffer.freeze_layout_with_fill_order  s:    $++~6IT[[8II6kk//6r   c                    t        | j                  t              sJ t        | j                               | j                  j	                  |      | _        y r   )r   rC  r  r   r9  r  s     r   r  z$Buffer.freeze_layout_with_same_order  s:    $++~6IT[[8II6kk//7r   c                    t        | j                  t              sJ t        | j                               | j                  j	                  ||      | _        y r  )r   rC  r  r   r5  r  s      r   r  z'Buffer.freeze_layout_with_exact_strides  sF     $++~6IT[[8II6kk22 3 
r   c                    t         j                  j                  j                  t	        j
                  | j                         d            S r  r  r  s    r   r  zBuffer.is_zero_elements  r  r   c                r      j                         rt        t         j                               S d fd}|S )Nr  c                x    j                         }t        j                  j                  xs d ||             S r  )r  rl   rZ  r   r   r  r  s     r   r  z"Buffer.make_loader.<locals>.loader  s/    '')G88DII2GENCCr   r]  )r  r   r  r  r^  s   ` r   r  zBuffer.make_loader  s0      "=0@AA	D r   c                "    | j                         S r   rh  r  s     r   r  zBuffer.codegen_reference  r  r   c                     y r   r   r  s    r   rh  zBuffer.decide_layout  r  r   c                    t        | j                  t              r%| j                  j                  j	                         gS yr  )r   rC  rE  rG  rh  r  s    r   r5  z#Buffer.get_inputs_that_alias_output  s/    dkk?3KK$$--/00r   c                    t        | j                  t              r%| j                  j                  j	                         gS yr  )r   rC  rf  r  rh  r  s    r   r1  zBuffer.get_mutation_names  s0    dkk#=>KK&&//122r   c                6    t        | j                         g      S r   )r:   rh  r  s    r   r  zBuffer.get_read_names  s    4==?+,,r   c                    t               S r   r9   r$  s     r   r%  zBuffer.get_free_symbol_uses       |r   c                    t               S r   r9   r  s    r   r  zBuffer.get_unbacked_symbol_defs  r  r   c                     y r   r   r  s    r   r  zBuffer.realize  r  r   c                     yr  r   r  s    r   should_allocatezBuffer.should_allocate  s    r   r8  rQ  rU  )r   z!Union[torch.Tensor, torch.SymInt]rK  r<  r?  rD  )r   r	  rG  rA  rB  rc  rC  rZ  r[  r]  )r   r\  r   r   )r  r\  r  r   r   r   rO  r   rJ  rg  r9  re  r  rH  )%r   r   r   r   r  r  rh  r  r  r  rn  r   r   r(  r  r  r  r"  r  r	  r  r  r  r  r  r  r  rh  r5  r1  r  rY   r%  r  r  r  r  r  s   @r   rg  rg  n  s    
50>
3 ' ')+(>
 +1 ;@V"V37V	V78
 CH
*
;?
	
U	

- H%$)!	! &
r   rg  c                  <    e Zd ZddZddZej                  ZddZy)OperationBufferc                    | gS r   r   r  s    r   r  zOperationBuffer.get_outputs  s	    vr   c                    | S r   r   r  s    r   r  zOperationBuffer.get_defining_op  rc  r   c                X    t         j                  |        t        j                  |        y r   )rg  r  rp  r  s    r   r  zOperationBuffer.__post_init__  s    T"%r   Nr  r   rp  r8  )r   r   r   r  r  rp  r3  r  r   r   r   r  r    s     #55&r   r  c                      e Zd ZddZy)rz  c                     yr  r   r  s    r   r   zInputBuffer.num_reads	  rj  r   Nrc  )r   r   r   r   r   r   r   rz  rz    s    r   rz  c                      e Zd ZdZy)DonatedBufferaY  
    Represents a donated buffer which is a saved tensor that is not alias to any
    fwd inputs, fwd user outputs, and bwd outputs. We generally cannot inplace
    reuse the input tensor memory during backward since it might be used in another
    function. However, donated buffer can be inplace reused during backward
    to save memory.
    N)r   r   r   rh  r   r   r   r  r    s    r   r  c                  ,    e Zd ZU dZded<   ddZddZy)r  NrL  r  c                     d fd}|S )Nc                    j                         j                         }t        j                  t        j
                  j                  j                         j                         ||             S r   )	r  r  rl   rZ  rn   r   constant_namerh  r  r  s     r   r  z*ConstantBuffer.make_loader.<locals>.loader  sP    oo'446G88%%dmmot7K7KL r   r]  r   r^  s   ` r   r  zConstantBuffer.make_loader  s    	 r   c                    t        t        j                  j                  | j	                         |      | j
                        S N)r   rC  )r  rn   r   r  rh  rC  r  s     r   r/  z!ConstantBuffer.constant_to_device$  s/    &&t}}?
 	
r   rO  rf  )r   r   r   r  r   r  r/  r   r   r   r  r    s    .2O+2
r   r  c                  V    e Zd ZddZ ed       	 d	 	 	 d	d       Zd
ddZddZddZy)NoneAsConstantBufferc                    t               S r   r9   r  s    r   r  zNoneAsConstantBuffer.get_reads,  r  r   c                    t               S r   r9   r$  s     r   r%  z)NoneAsConstantBuffer.get_free_symbol_uses/  r  r   Nc                J    t         j                  j                  j                  S r   )rn   r   rf  none_strr  s     r   r  z&NoneAsConstantBuffer.codegen_reference5  s    ww##,,,r   c                    t        d       S Nr  )r\  r  s    r   r  z$NoneAsConstantBuffer.get_output_spec8  s    &&r   c                     yr  r   r  s    r   r  z&NoneAsConstantBuffer.has_tensor_output;  r  r   rb  rZ  re  r   rJ  rB  rC  )	r   r   r   r  rY   r%  r  r  r  r   r   r   r  r  *  sC     23$)!	! 4
-'r   r  c                  R    e Zd ZU ded<    ed       	 d	 	 	 dd       Zd	d
dZddZy)r   r    r,  c                .    t        | j                  |      S r   )r(   r,  r$  s     r   r%  z*ShapeAsConstantBuffer.get_free_symbol_usesC  s      		=99r   Nc                h    t         j                  j                  j                  | j                        S r   )rn   r   rf  codegen_sizevarr,  r  s     r   r  z'ShapeAsConstantBuffer.codegen_referenceI  s!    ww##33DII>>r   c                     yr  r   r  s    r   r  z'ShapeAsConstantBuffer.has_tensor_outputL  r  r   rZ  re  r   rJ  rC  )r   r   r   r   rY   r%  r  r  r   r   r   r   r   ?  s<    
J34$):!:	!: 5:
?r   r   c                      e Zd ZU dZded<   dZded<   dZded	<   dZd
ed<   dZded<   dZ	ded<   e
j                  d%d       Zee
j                  d%d              Zd&dZd'dZd(dZd)dZd*dZ ed       	 d+	 	 	 d,d       Zd- fdZd.dZd/dZd0dZd1dZe	 	 d2d       Z	 	 d3	 	 	 	 	 d4dZe	 d5	 	 	 	 	 	 	 	 	 	 	 d6d       Zd7dZd7d Z d&d!Z!d.d"Z"d.d#Z#d8d$Z$ xZ%S )9rx  zb
    Represents a buffer that is computed during kernel execution rather than being an input.
    r  rB  FzClassVar[bool]_force_realizeNrW  r  Optional[Callable[..., Any]]r  rF  r  r  c           
   #    K   | j                   J | j                  J | j                  J | j                  J t	        | j
                  t              sJ t        | j
                                | j
                  }| j                  }	 t        |j                  |j                  | j                  | j                  | j                  |j                  |j                  |j                        }|| _        t        |j                  |j                  | j                        | _        | j                  j!                  |        d  || _        || _        y # || _        || _        w xY ww)NrQ  )r  r  r  r  r   rB  r9  r   rC  r   r   r&  r;  r<  rE  get_default_sizes_bodyclear_cache)r  old_datarL  new_datas       r   with_original_inner_fnz%ComputedBuffer.with_original_inner_fn_  s=    +++&&222$$000..:::$))Y/EDO3DE/99[[
	% nn00,,!%!@!@'66",,'66	H !DI &%%DK
 ''33D9 DI$DK !DI$DKs   BE$	B9E E$E!!E$c               #     K   t         j                  } 	 dt         _        d  | t         _        y # | t         _        w xY wwNT)rx  r  )	old_values    r   force_realizezComputedBuffer.force_realize  s3      #11		6,0N),5N)IN)s   ?/ ?<?c                    | j                   | j                   S t        | j                  d      r| j                  j                   S y)z
        Returns self.name if it exists, otherwise returns the name of the data node if that exists.
        If neither exist, returns None.
        Nr   )r   r  rB  r  s    r   get_computed_buffer_namez'ComputedBuffer.get_computed_buffer_name  s7    
 99 99499f%99>>!r   c                6    | j                   j                         S r   rB  r   r  s    r   r   zComputedBuffer.num_reads  r  r   c                6    | j                   j                         S r   rB  r  r  s    r   r  zComputedBuffer.get_reads  r  r   c                6    | j                   j                         S r   r  r  s    r   r  zComputedBuffer.get_read_names  r  r   c                X   t        | j                  t        t        t        t
        f      s0t        j                  t               t               t                     S t        j                  t        dd      5  | j                  j                         rTt        | j                         | j                  j                         | j                  j!                               cd d d        S t        | j                         | j                  j#                               cd d d        S # 1 sw Y   y xY w)Nr  writesindex_exprsr  T)r   rB  r9  rB  r{  r  rC   
ReadWritesr:   r   r   r  r'  rL   get_store_functionr  r)  r   r  s    r   r  zComputedBuffer.get_read_writes  s    $))itY%GH** l!|&L  \\.*:DA 	yy++-*++-II002II002	 	 +++-II&&(	 	 	s   6A%D %1D  D)c                    | j                   j                  |      | j                  j                  |      z  }| j                         r"|| j	                         j                  |      z  }|S r   )rC  r%  rB  has_store_functionr  )r  r^  r  s      r   r%  z#ComputedBuffer.get_free_symbol_uses  sc    $ 11
II**=9: ""$d**,AA-PPFr   c                    | j                         s_| j                  t        j                  j                  vr9| j                         dk(  r&| j                  s| j                  j                         S t        | !         S r  )
r'  r   rn   r   mutated_buffersr   r  rB  r  r@  rN  s    r   r  zComputedBuffer.make_loader  s`    '')		!8!88 A%'' 99((**w"$$r   c                V    t        | j                  t        t        t        t
        f      S r   )r   rB  r9  rB  r{  r  r  s    r   r  z!ComputedBuffer.has_store_function  s    $))itY%GHHr   c                   | j                         j                         j                         }t        | j                  t
        t        t        f      r+t        | j                  j                  | j                  |      S t        | j                  t              sJ t        | j                               t        | j                  j                  | j                  |      S r   )r  r  r  r   rB  r9  rB  r{  r   rH  r   r  r   r  )r  r  s     r   r  z!ComputedBuffer.get_store_function  s    //#,,.;;=dii)T4!8949944diiIIdii3DT$))_D349911499gFFr   c                P   t        | j                  t              r{t        j                  | j
                  j                         | j
                  j                               \  \  }}}| j                         j                  }t        d |D              sJ |D cg c]_  }t        |t        j                        rCt        |j                  |D ci c]#  }|dk7  s	|t        j                  j                   % c}      a }}}|rt        | j
                  t"        t$        f      r| j
                  j'                  ||      }n|}|D cg c],  }t(        j*                  j,                  j/                  ||      . }	}ddlm}
  |
|	| j5                               S yc c}w c c}}w c c}w )al  
        If our layout is still flexible, try to determine the stride order based on stride orders of reads.

        TODO(jansel): A better algorithm here would look at downstream consumers of this
                      value and try to do global graph-level layout optimization.
                      This is also something just begging to be autotuned.
        c              3  p   K   | ].  }t        |t        j                  t        j                  f       0 y wr   )r   rC   StarDep	MemoryDepr  s     r   r   z0ComputedBuffer.get_fill_order.<locals>.<genexpr>  s0       1|33\5K5KLMs   46r   rA   pick_loop_orderN)r   rC  r  rC   rz  rB  r  r)  r  r  r   r  rj   r   r   r  r  rB  r{  r   rn   r   r   r|  	schedulerr  r   )r  
index_varsrI  r   r  r  vrk  r,  stride_lengthsr  s              r   r   zComputedBuffer.get_fill_order  sj    dkk>2.:.M.M		,,.		0L0L0N/+(Z! ((*00E      a!7!78 177n$WPQUVPVQ_$WXE  dii$6"ii//
NKG(GMR"EIAGG$$11$@" " 7&~t}}GG# %X"s$   3F
FF6	F1F#Fc                    t        | j                  t              r5| j                         }|r| j	                  |       y | j                          y y r   )r   rC  r  r   r  r	  r  s     r   rh  zComputedBuffer.decide_layout  s@    dkk>2'')E2259""$ 3r   c                R   t        j                  | j                         | j                         d      \  }}t	        j
                  t        d| j                               5  t        | j                         | j                         r|n|d d |g| }d d d        g }g }g }g }|j                         D ]^  \  }}	||d   v r'|rJ |j                  |       |j                  |	       4||d   v sJ |j                  |       |j                  |	       ` ||f||ffS # 1 sw Y   xY w)Nqr}   r  rA   r   )rC   rz  r  r)  r   r   r  r  rN   r  r'  itemsr  )
r  r   
var_rangesr  r  reduce_vars
index_sizereduce_sizer  r   s
             r   r  z%ComputedBuffer.get_default_sizes_body  s@    (::##%t'>'>'@
j \\.*;T__=NO 	'')002Ra 	D	 
!#
$$& 	&DAqDG|&&!!!$!!!$DG|#|""1%""1%	& K($[0III)	 	s   !2DD&c                     j                         \  \  }}}\  }}|r |||f|||f      \  \  }}}\  }}g |j                  j                         |t        |t              rt        |      dk(  sJ |\  }}	t        |t              sJ t        |             t        |	t              sJ t        |	             t        d |	D              sJ |j                  }
|
|k(  s	J |
|f       |	D cg c]	  }|vs| }	}|	z  g |j                         t        j                  j                   t        j                         sj#                  |j%                                	 	 	 	 	 	 	 	 	 	 d fd}||z   }t'        t)                      xs t*        j,                   } |||||      \  }}} |||||      \  }}}t/        j0                  ||d      \  \  }}}t3        | ||       ||      g|||      }||f|fS c c}w )an  
        This is a main place where we do loop transformations in a
        backend-agnostic way.

        Here we:
            1) Remove any 1 dimensions
            2) Fuse contiguous dimensions together
            3) Reorder dimensions based on stride orders

        Optional argument extra_indexing_constraints can be used to append additional
        indexing expressions to existing ones derived from buffer's body. This can be useful
        to fuse scheduler nodes with compatible ranges, e.g. (s0*s1*...,) and (s0, s1, s2, ...)
        on CPU by preventing indexing simplifications and obtaining index/reduce ranges for
        the scheduler node compatible with other nodes.
        Optional argument recompute_sizes_body_func can be used to recompute sizes and body
        on the default body. This can be useful to append additional loop transformations.
        r   c              3  <   K   | ]  }t        |t                y wr   )r   r    )r   fs     r   r   z6ComputedBuffer.simplify_and_reorder.<locals>.<genexpr>c  s     Hqz!T*H   c           	        j                  | ||      \  }}}j                         dk(  rat        |      dk(  rSt        t	        t        |                  } ||      d   dk7  r(|D cg c]  }||   	 }}t        |      }t        |      } ||       } |rGt        j                  j                  j                  | |t        | |            \  }}	}
t        ||	      }n|}|||fS c c}w )Nr  r   r   )_apply_loop_reorderingr'  r   r   r   r   r   rn   r   r   _simplify_loopsrG   r   )x_varssupport_varsr  simplify_loopsnewsizesreindex0r   r   r   r   _pruner   index_formulasmemory_addrsr  s               r   simplify_and_reorderzAComputedBuffer.simplify_and_reorder.<locals>.simplify_and_reordert  s     ,0+F+Fe\,(Hh* &&(E1c%jAoU3u:./ E?1%*278Qa8H8+E2H.u5H f%F-.WW-=-=-M-M,^VXN.*(F
 *(H="Wh..!  9s   *C&pr}   )
r  Sequence[sympy.Symbol]r  r  r  r\  r  r   r   dtuple[list[int], Callable[[Sequence[int]], Sequence[int]], Callable[[Sequence[int]], Sequence[int]]])r  indexing_exprsr   r   r   r   r   r   r   r   r  get_write_exprsrn   r   rq  rD   PREFER_STORE_LOOP_ORDERextendget_read_exprsre   r  rB   loop_ordering_after_fusionrC   index_vars_no_squeezerN   )r  extra_indexing_constraintsrecompute_sizes_body_funcr  r  r  r  r  extra_indexing_rangesextra_indexing_exprexpected_var_rangesr  r  r  should_merge_loopsiter_rangesiter_reindexr   reduce_rangesreduce_reindex	iter_varsr  r  r   s   `                     @@r   r  z#ComputedBuffer.simplify_and_reorder5  s   4 '')		
%Z%Z %
 *[)4*k1J	)[)[
 94..5578%15u=23q89 :T6!#63T:WDAV<WW:148S$?R:SS8H4GHHHH"&//&*?? #%B ? /#!>2I# # 11N0--/0ww""4)O)OP 3 3 564	/*4	/04	/ !4	/ !	4	/

4	/l "K/t,--VV5V5V1V 	 (<	(
$\1 ,@{4F,
(~q
 0</Q/Q0
, K*
 )$n[&AB
 ]+T11#s   -	G?7G?c           
     X   ddl m} |g }	 |D cg c]-  }t        j                  j                  j                  || |      / }}t        |      t        |      k(  rt        |d         t        |       k(  sJ t        t         ||||                  }|D 	cg c]  }	||	   	 }}	|t#        |      t%        |      fS c c}w # t        $ rZ t        j                  r*t        j                  dt        t        | |            |       t        t!        t        |                  }Y w xY wc c}	w )zU
        Shuffle the order of loops around to hopefully improve performance.
        rA   r  r   z%Did not simplify complex index:
%s
%s)r  r  rn   r   r   r|  r   r   r  	ExceptionrB   r  ry  warningr   r   r   r   r   )
r  r  r  r   priority_idxr  r,  rJ  r   r   s
             r   r  z%ComputedBuffer._apply_loop_reordering  s'    	/L	, )   --dJMG  w<3|#44WQZCM :   /'5,"OPQE $))aq))l5)?5+AAA#  	,||=Z/0 
 s5z*+E	, *s*   C 2B<AC D'<C A D$#D$c                6    | j                   j                         S r   )rB  r  r  s    r   r  z!ComputedBuffer.get_pointwise_size      yy++--r   c                6    | j                   j                         S r   rB  r)  r  s    r   r)  z!ComputedBuffer.get_reduction_size  r  r   c                6    | j                   j                         S r   rB  r'  r  s    r   r'  z!ComputedBuffer.get_reduction_type  r  r   c                6    | j                   j                         S r   )rB  r  r  s    r   r-  zComputedBuffer.is_no_op  s    yy))++r   c                     yr  r   r  s    r   r  zComputedBuffer.should_allocate   r  r   c                8    | j                   j                  |      S )r  rB  r/  r  s     r   r/  z!ComputedBuffer.constant_to_device  s    yy++F33r   )r   Iterator[None]rH  rc  rb  r9  r`  rZ  re  rO  rC  )r   zCallable[..., None])r   Optional[list[int]]r8  )r   zMtuple[tuple[list[Expr], list[Expr]], LoopBody, tuple[list[Expr], list[Expr]]]NN)r  *Optional[tuple[dict[Any, Any], list[Any]]]r  r  r   z8tuple[tuple[list[Expr], list[Expr]], Optional[LoopBody]]r   )r  r  r  r  r  r\  r   zlist[sympy.Expr]r  r&  r   r  rD  rf  )&r   r   r   rh  r   r  r  r  r  r  rl  rm  r  rk  r  r  r   r  r  r  rY   r%  r  r  r  r   rh  rX   r  r  r  r  r)  r'  r-  r  r/  r  r  s   @r   rx  rx  P  s    K%*NN* "&K%7;4;5929?C <C %  %D 6  6	%%** ,-$)!	! .2	%IG%N% J
J JD RVBFU2$NU2 $@U2 
B	U2n  -1%B*%B,%B %B '	%B
 *%B
%B %BN...,4r   rx  c                  v     e Zd ZdZ	 	 	 	 	 	 	 	 d	 fdZd
dZdddZddZddZddZ		 	 d	 	 	 	 	 ddZ
 xZS )r{  zt
    Represents a Triton (in the future other type) of template operator
    that we can fuse an epilogue onto.
    c                    t         |   d |       t        j                  |      | _        || _        t        j                  j                  |       | _	        t        j                  j                  |        i | _        y r  )r@  r  ry  unwrap_storager_  make_kernel_renderrn   r   register_bufferr   register_operationr   )r  rC  r_  r,  rB  s       r   r  zTemplateBuffer.__init__  sa     	d62"11&9"4GG++D1		""4(+-r   c                &    | j                  d      S )NT	normalize)rL   r  s    r   r  zTemplateBuffer.get_read_writes  s    ''$'77r   c           	     d   | j                         | j                         j                         dfd}t        j                  || j                         d|      }| j                  D ]  t        t        t        f      sJ t                     t        j                  t              sJ t        j                               j                  j                         dfd}|xj                  t        j                  |j                         d|      j                  z  c_         |S )Nc                ^    t        |      dk(  sJ t        j                   |       d      S )Nr   fake)r   rl   r  )r   rN  r  r   s     r   dummyz1TemplateBuffer.extract_read_writes.<locals>.dummy#  s,    v;!###99T75>6::r   r   r0  c                x    t        |      dk(  sJ t        j                  j                          |             S r  )r   rl   rZ  rh  )r   rN  r  ra  s     r   r5  z1TemplateBuffer.extract_read_writes.<locals>.dummy1  s0    6{a'''xx??r   )r   Sequence[Any]rN  r7  r   r   )rh  r  r  rC   rL   r   r_  r   rH  rg  r   rC  r  r  )r  r1  r5  depsr  ra  r   s       @@@r   rL   z"TemplateBuffer.extract_read_writes  s    }}//#002	; //4==?B)
 ;; 	CcOV#<=HtCyH=cjj&1C4

3CC1jj--/G@
 JJ,::s||~rYeJ	 r   c                6    t         j                  j                  S r   )r   r  r3  r  s    r   r)  z!TemplateBuffer.get_reduction_size<  s    ww{{r   c                     y r   r   r  s    r   r'  z!TemplateBuffer.get_reduction_type?  r  r   c                     yr  r   r  s    r   r  zTemplateBuffer.should_allocateB  r  r   c                *    | j                         g fd fS r   r  )r  r  r  s      r   r  z#TemplateBuffer.simplify_and_reorderE  s$      
 	
r   )rC  r  r_  Sequence[IRNode]r,  r  r   r   r`  rZ  )r1  r   r   ra  rD  rH  rC  r'  )r  r(  r  r  r   z<tuple[tuple[Sequence[Expr], list[Expr]], Optional[LoopBody]])r   r   r   rh  r  r  rL   r)  r'  r  r  r  r  s   @r   r{  r{    sy    
.. !. 9	.
 
.8:
 RVBF
$N
 $@
 
F	
r   r{  c                       e Zd Z	 	 d	 	 	 	 	 	 	 	 	 	 	 d fdZ ed       	 d	 	 	 d	 fd       Zd
dZddZddZ xZ	S )TritonTemplateBufferc           
        t         |   |||       || _        | g| _        |t	        | j
                  d   t              sJ t        | j
                  d                | j
                  d   j                         }| xj                  |D cg c]  }t        t        |      ||        c}z  c_        |r|n	t               | _        d| _        d| _        yc c}w )a  
        NOTE:[TritonTemplates with multiple outputs]
        We want the ability for TritonTemplates to output multiple tensors. Triton
        kernels have no notion of outputs and this is done by creating tensors that
        are then mutated by the kernel. Currently our STORE_OUTPUT codegen doesn't
        support creating multinode outputs for triton templates.
        We work around this by creating an extra input buffer during the lowering
        and we mark them as mutated inputs.
        Nr   r  )r@  r  mutated_inputsoutputsr   r_  r   r   r  MutationOutputr\  r:   allowed_prologue_inpssubgraph_inpssubgraph_outs)	r  rC  r_  r,  rA  rD  r   rm  rB  s	           r   r  zTritonTemplateBuffer.__init__T  s    " 	);<,&*V%dkk!nf5KtDKKN7KK5[[^..0FLL) z8#tD L &;!
 	" SW?Cs   Cc                   t         |   |      }| j                  r| j                  ng }| j                  r| j                  ng }|D ]m  }t	        |t
        j                        r|j                  t        ||             9t	        |t              r!|j                  |j                  |             j|mJ  |D ]7  }t	        |t              r!|j                  |j                  |             4|7J  |S r   )
r@  r%  rF  rE  r   r   r    updater(   r   )r  r^  resrF  rE  ra  r   rB  s          r   r%  z)TritonTemplateBuffer.get_free_symbol_usesw  s     g*=9.2.@.@**b.2.@.@**b  	#C#uzz*

+C?@C(

333MBC{"{	# ! 	#C#v&

333MBC{"{		# 
r   c                    | j                   S r   rB  r  s    r   r  z TritonTemplateBuffer.get_outputs      ||r   c                    | j                   S r   )rD  r  s    r   get_allowed_prologue_inpsz.TritonTemplateBuffer.get_allowed_prologue_inps  s    )))r   c                &    d| j                    d}|S )NzTritonTemplateBuffer(layout=r  rb  )r  r   s     r   r  zTritonTemplateBuffer.__str__  s    ,T[[M;
r   r'  )rC  r  r_  r=  r,  zOptional[Callable[_P, _T]]rA  Optional[Iterable[IRNode]]rD  zOptional[OrderedSet[str]]r   r   rZ  re  r  r9  rU  )
r   r   r   r  rY   r%  r  rN  r  r  r  s   @r   r?  r?  S  s     6:;?!D!D !!D 7	!D
 3!D  9!D 
!DF 23$)!	! 4.*r   r?  c                       e Zd ZdZ	 	 	 	 	 	 	 	 	 	 d fdZddZddZddZddZddZ	ddZ
dd	Zdd
ZddZ xZS )ChoiceCallera1  
    Represents a possible choice used in autotune_process.py.
    During autotuning, self.benchmark() is first called to get benchmark result,
    and if this choice is selected, self.output_node() is called to get the output_node.

    Children classes: TritonTemplateCaller, CUTLASSTemplateCaller.
    c                v    t         |           || _        || _        || _        || _        d| _        i | _        y r  )r@  r  r   rC  r   descriptionfailedr   )r  r   r   rC  rT  rB  s        r   r  zChoiceCaller.__init__  sA     		& '! ,.r   c                   | j                         t        j                  rt        fd      S t	        j
                  d|id       S )Nc                        S r   r   )algor   s   r   r_  z(ChoiceCaller.benchmark.<locals>.<lambda>  s    D$K r   r   r  )to_callablerB   /profile_bandwidth_with_do_bench_using_profilingr^   rS   	benchmark)r  r   r   rX  s     `@r   r[  zChoiceCaller.benchmark  sA    !AA+,?@@$$T4%dKKr   c                    t         r   ru  r  s    r   	call_namezChoiceCaller.call_name  rv  r   c                    t         r   ru  r  s    r   rY  zChoiceCaller.to_callable  rv  r   c                "    | j                         S )z
        Hash key for the underlying kernel. By default, we assume there are no
        runtime params, so kernel hash key defaults to choice caller's hash key.
        )hash_keyr  s    r   kernel_hash_keyzChoiceCaller.kernel_hash_key  s    
 }}r   c                    t         r   ru  r  s    r   r`  zChoiceCaller.hash_key  rv  r   c                    t         r   ru  r  s    r   rZ  zChoiceCaller.output_node  rv  r   c                    i S )zRInformation returned here is logged to the autotune log file when that is enabled.r   r  s    r   	info_dictzChoiceCaller.info_dict  s    	r   c                     y)Nunsupported_choicer   r  s    r   autoheuristic_idzChoiceCaller.autoheuristic_id  s    #r   c                    d| _         y)z
        Mark the choice as failed so that it can be
        removed later. Useful for when we decouple
        compilation and tuning.
        TN)rU  r  s    r   mark_failedzChoiceCaller.mark_failed  s     r   )
r   r   r   r  rC  r  rT  r   r   r   )r   r   r   r  r   r  rU  )r   r  )r   r   )r   z<dict[str, Union[PrimitiveInfoType, list[PrimitiveInfoType]]]r8  )r   r   r   rh  r  r[  r]  rY  ra  r`  rZ  re  rh  rj  r  r  s   @r   rR  rR    sk    .. ". 	.
 . 
.(L""""$r   rR  c                      e Zd ZddZy)TritonTemplateCallerBasec                    t         r   ru  r  s    r   get_make_kernel_renderz/TritonTemplateCallerBase.get_make_kernel_render  rv  r   N)r   r   )r   r   r   rn  r   r   r   rl  rl    s    "r   rl  c                       e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 d
 fdZedd       Zedd       Z	 d	 	 	 ddZe	j                  dd       ZddZ	 d	 	 	 ddZ	 	 	 	 dd	Z xZS )MultiTemplateBufferaG  
    Represents a Buffer with multiple backing implementation choices.

    Choices can be TritonTemplates or ExternKernels. During scheduling if there is a potential
    epilogue we will benchmark each of the choices with the epilogue to determine an implementation.
    Otherwise, the fastest base choice will be chosen.
    c                    t         |   ||d |       || _        i | _        || _        || _        t        d |D              | _        i | _        y )N)rC  r_  r,  rD  c              3     K   | ]R  }t        |t              xs< t        |t        j                  j                  j
                        xr |j                   T y wr   )r   rl  r  r  select_algorithmExternKernelCallerhas_out_variant)r   choices     r   r   z/MultiTemplateBuffer.__init__.<locals>.<genexpr>  sT      %
  v78 65??#C#C#V#VW +**%
s   AA)	r@  r  _choice_timings_fn_choice_timings_choicesoriginal_inputsr   _output_plannable_make_kernel_renders)r  rC  r_  choice_timings_fnunfiltered_choicesrD  rB  s         r   r  zMultiTemplateBuffer.__init__  sk     	#"7	 	 	
 #4OQ,>%!$ %
 -%
 "
 ?A!r   c                    | j                   S )z^
        Are all possible choices TritonTemplates or Extern Kernels with out variants
        )r{  r  s    r   output_plannablez$MultiTemplateBuffer.output_plannable  s    
 %%%r   c                    | j                   S r   )ry  r  s    r   rv  zMultiTemplateBuffer.choices  s    }}r   c                x    || j                   vr| j                  |      | j                   |<   | j                   |   S r   )rx  rw  )r  hint_overrides     r   choice_timingsz"MultiTemplateBuffer.choice_timings  s>      4 44262I2I-2XD  /##M22r   c              #  0  K   t        |t        j                  j                  j                        sJ t        |             | j                  |j                  k(  sJ | j                  }|j                         | _        	 d  || _        y # || _        w xY wwr   )	r   r  r  rs  TritonTemplateCallerr   rC  r,  rn  )r  callerrenders      r   swap_as_triton_callerz)MultiTemplateBuffer.swap_as_triton_caller  s     EOO44II
 	<	 
 {{fmm+++(("("?"?"A	-&,D#fD#s   A;B>B
 B
	BBc                N   t        |t        j                  j                  j                        sJ t        |             | j                         |j                  j                  k(  sJ | j                         |j                  j                  k(  sJ |j                         | _        y r   )r   r  r  rs  r  r   r   rC  r   r(  r   rn  r,  )r  r  s     r   finalize_as_triton_callerz-MultiTemplateBuffer.finalize_as_triton_caller-  s    EOO44II
 	<	 
 }}&--"4"4444 FMM$8$8888"("?"?"Ar   c                b    | j                  |      }t        ||j                        }|||   fS )N)r  r`  )r  r	  r  )r  r  timings
min_choices       r   get_min_choicez"MultiTemplateBuffer.get_min_choice5  s7     %%M%Bgkk2
GJ/00r   c                    |j                         D ]"  \  }}|j                         | j                  |<   $ | j                  d   | _        y)z;Finalize with multiple callers for different hint overridesN)r  rn  r|  r,  )r  callersr  r  s       r   finalize_as_triton_callersz.MultiTemplateBuffer.finalize_as_triton_callers<  sN     &-]]_ 	W!M67=7T7T7VD%%m4	W #'";";D"Ar   )rC  r  r_  r=  r}  z4Callable[[Optional[int]], dict[ChoiceCaller, float]]r~  list[ChoiceCaller]rD  r:  r   r   rC  )r   r  r   )r  rW  r   zdict[ChoiceCaller, float])r  rl  r   r%  )r  rl  r   r   )r  rW  r   ztuple[ChoiceCaller, float])r  z-dict[Optional[int], TritonTemplateCallerBase]r   r   )r   r   r   rh  r  rn  r  rv  r  rl  rm  r  r  r  r  r  r  s   @r   rp  rp    s    AA !A P	A
 /A  /A 
A8 & &   .23*3	"3 - -B .21*1	#1BDB	Br   rp  c                  L     e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddZddZ xZS )CUTLASSTemplateBufferc                R    t         |   |||       || _        || _        || _        y r   )r@  r  workspace_sizetemplatesupports_epilogue_fusion)r  rC  r_  r,  r  r  r  rB  s          r   r  zCUTLASSTemplateBuffer.__init__H  s.     	);<, (@%r   c                6    | j                   | j                   S dS r  r  r  s    r   r  z(CUTLASSTemplateBuffer.get_workspace_sizeW  s    &*&9&9&Et""L1Lr   c                x    | j                         D ]'  }t        j                  |j                         d d        ) y r   )r  rl   r  rh  )r  rQ  s     r   emulate_store_fnz&CUTLASSTemplateBuffer.emulate_store_fnZ  s1    &&( 	5FIIfoo't4	5r   )rC  r  r_  r=  r,  Callable[_P, _T]r  r   r  rr   r  r   r   r   rc  r8  )r   r   r   r  r  r  r  r  s   @r   r  r  G  s_    AA !A -	A
 A "A #'A 
AM5r   r  c                  D     e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 d fdZd fdZ xZS )CppTemplateBufferc                R    t         |   |||       || _        || _        d | _        y r   )r@  r  r  rv  rB  )r  rC  r_  r,  r  rv  rB  s         r   r  zCppTemplateBuffer.__init__`  s,     	);< /3r   c                v   t        | j                  t              rt        | j                  t              sJ t        | j                               | j                  d   }t        |t              sJ t        |             |j                  }t        |t              sJ t        |             |S t        | %         S r  )
r   rC  MultiOutputLayoutrB  r   r   rg  r  r@  r  )r  first_outputrC  rB  s      r   r  zCppTemplateBuffer.get_layoutm  s    dkk#45dllH5ItDLL7II5<<?LlF3GT,5GG3!((Fff-;tF|;-M7%''r   )rC  r  r_  r=  r,  r  r  rr   rv  r   r   r   rA  )r   r   r   r  r  r  r  s   @r   r  r  _  sL    44 !4 -	4
 "4 4 
4
( 
(r   r  c                  F     e Zd ZdZ	 d	 	 	 	 	 	 	 	 	 	 	 d fdZddZ xZS )CuteDSLTemplateBufferz
    Buffer for CuteDSL (CUTLASS Python DSL) template kernels.
    Similar to other template buffers but specialized for CuteDSL operations.
    c           
        t         |   |||       || _        || _        | g| _        |t        | j                  d   t              sJ t        | j                  d                | j                  d   j                         }| xj                  |D cg c]  }t        t        |      ||        c}z  c_        y y c c}w )Nr   r  )r@  r  r  rA  rB  r   r_  r   r   r  rC  r\  )	r  rC  r_  r,  r  rA  r   rm  rB  s	           r   r  zCuteDSLTemplateBuffer.__init__  s     	);< ,&*V%dkk!nf5KtDKKN7KK5[[^..0FLL) z8#tD L &s   B<c                    | j                   S r   rK  r  s    r   r  z!CuteDSLTemplateBuffer.get_outputs  rL  r   r   )rC  r  r_  r=  r,  r  r  r   rA  rP  r   r   r  )r   r   r   rh  r  r  r  r  s   @r   r  r  z  sQ     6: ! -	
  3 
*r   r  c                       e Zd ZdZ	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddZd	dZ	 d
	 	 	 	 	 ddZ xZS )NVUniversalGemmBufferz
    Buffer for NVIDIA Universal GEMM kernels.

    Unlike CuteDSL templates which use Jinja templates, this generates
    simpler Python code that directly calls the cutlass_api library.
    c                0   t         |   ||d        || _        || _        | g| _        || _        || _        || _        || _        |	| _	        |
| _
        |j                  j                  |j                  j                  d| _        | j                  | _        y )N)r,  )kernel_namemin_cc)r@  r  kernelaccumulator_typerB  r  variantscale_type_ascale_type_bswizzle_type_aswizzle_type_bmetadatar  r  kernel_metadata_make_kernel_renderr,  )r  rC  r_  r  r  r  r  r  r  r  r  rB  s              r   r  zNVUniversalGemmBuffer.__init__  s     	DA 0&*V,((,, "??66oo,, 
 #'":":r   c                    | j                   S )z#Return the workspace size in bytes.r  r  s    r   r  z(NVUniversalGemmBuffer.get_workspace_size  s    """r   c                    | j                   S r   rK  r  s    r   r  z!NVUniversalGemmBuffer.get_outputs  rL  r   c                  	 ddl m} ddlm} g }| j                  D ]K  }t        |t              r|j                  }t        |t              r|j                  }|j                  |       M t        |j                        } ||||| j                  | j                  | j                  | j                  | j                   | j"                  | j$                  | j&                        		fd}	|fS )z
        Create a kernel renderer for code generation.

        Returns (kernel, render) tuple where:
        - kernel: NVUniversalGemmKernel object with call_kernel() method
        - render: function that returns source code string
        r   )NVUniversalGemmKernel)Placeholder)r  r   rZ  r  r  r  r  r  r  r  r  c                 $     j                         S r   )r  )render_kernels   r   r  z9NVUniversalGemmBuffer._make_kernel_render.<locals>.render  s     ''))r   )Btorch._inductor.codegen.nv_universal_gemm.nv_universal_gemm_kernelr  torch._inductor.utilsr  r_  r   r   rB  rf  r  r   KERNEL_NAMEr  r  r  r  r  r  r  r  )
r  out_noder  r  r  r   ra  r  r  r  s
            @r   r  z)NVUniversalGemmBuffer._make_kernel_render  s    	
 	6!#;; 	$C#y)hh#z*hhs#	$ +112-##  00!22..LL****....
	* f$$r   )r   NNNN)rC  r  r_  r=  r  r   r  r   r  r   r  r   r  Optional[Any]r  r  r  r  r  r  r   r   rc  r  r   )r  r   r  rW  r   ztuple[Any, Any])	r   r   r   rh  r  r  r  r  r  r  s   @r   r  r    s      &*&*(,(,;; !; 	;
 ; ; ; $; $; &; &; 
;B# =A*%*%,9*%	*%r   r  c                &    t        d | D              S )Nc              3  <   K   | ]  }t        |t                y wr   r   r   r   r  s     r   r   z#is_node_sequence.<locals>.<genexpr>  s     4z!V$4r  )r   )r   s    r   is_node_sequencer    s     4e444r   c                      e Zd ZU ded<   ddZddZddZedd       Ze		 	 	 	 dd       Z
ddZdd	Z ed       	 d	 	 	 dd
       Zy)ry  )Sequence[Union[IRNode, Sequence[IRNode]]]r_  c                d    | j                   |   }t        |t              sJ |j                         S r   r_  r   r   rh  )r  r   inputs      r   
input_namezInputsKernel.input_name   s,    A%(((~~r   c                   t        t        j                            }t        j                  | j                  D ]c  }t        |t              r|j                  fd|D               .t        |t              r?|j                   |j                                      e t        t        j                     fd| j                         D              }t        j                  ||t                     S )Nc              3  J   K   | ]  } |j                                 y wr   r  )r   r   r  s     r   r   z/InputsKernel.get_read_writes.<locals>.<genexpr>
  s     BqWQZZ\2B    #c              3  J   K   | ]  } |j                                 y wr   r  )r   rm  r  s     r   r   z/InputsKernel.get_read_writes.<locals>.<genexpr>  s!      .
(+GCLLN#.
r  r  )r:   rC   rI   r  r_  r   r   rH  r   r  rh  r  r  )r  r  r  r  r  s       @r   r  zInputsKernel.get_read_writes  s    <++,.&&[[ 	5E%*BEBBE#89		'%.."234	5 L,,- .
/3/?/?/A.
 
 &&"
 	
r   c                6    | j                         j                  S r   r  r  s    r   r  zInputsKernel.get_reads  r  r   c                   t        |t              r|j                  }t        |t              r|j                  }t        |t              r%t        |t
              st        j                  |      }t        |t              r| j                  |      S t        |t              r|S t        |t        t
        f      sJ t        |             |S r   )r   r   rB  rf  rd  rH  r  realize_inputunwrap_storage_for_inputTorchBindObjectrg  r   r  r   s     r   r  z%InputsKernel.unwrap_storage_for_input  s    a#Aa$Aa":a+I**1-Aa#
 //22a)H!fo67@a@7r   c                    g }| D ][  }t        |t              r#|D cg c]  }t        j                  |       }}nt        j                  |      }|j	                  |       ] |S c c}w r   )r   r   ry  r  r  )r_  
inputs_newr   r   s       r   r+  zInputsKernel.unwrap_storage1  sl     =?
 	!A!X&GHI!\::1=II 99!<a 	! 	 Js   A%c                     yr  r   r  s    r   r+  zInputsKernel.is_extern>  r  r   c                     yr  r   r  s    r   r   zInputsKernel.num_readsA  rj  r   c                    t        t        j                            }| j                  D ]B  }t	        |t
              r||j                  |      z  }(|D ]  }||j                  |      z  } D |S r   )r:   r   r"   r_  r   r   r%  )r  r^  r  ra  	inner_inps        r   r%  z!InputsKernel.get_free_symbol_usesD  sv     u||$&;; 	GC#v&S--m<<!$ GI77FFAG		G r   N)r   r   r   r   r`  rb  r'  )r_  r  r   z%list[Union[IRNode, Sequence[IRNode]]]rC  rc  rZ  re  )r   r   r   r   r  r  r  r  r  rk  r+  r+  r   rY   r%  r   r   r   ry  ry    s    55 

,,  $ 
9
	.
 
 N+$)
!
	!
 ,
r   ry  c                      e Zd ZddZddZy)	NopKernelc                     yr  r   r  s    r   r-  zNopKernel.is_no_opS  r  r   c                    t               S r   r9   r  s    r   r  zNopKernel.get_readsV  r  r   NrC  rb  )r   r   r   r-  r  r   r   r   r  r  R  s    r   r  c                      e Zd ZdZedd       Ze	 d		 	 	 	 	 d
d       Z ed       	 d	 	 	 dd       Zedd       Z	ddZ
y)ConcatKernelzn
    There isn't actually a real kernel for concat, we just change the
    storage for the upstream data.
    c                z
   |d   j                         }|d   j                         }t        |d   j                               }dg}||   g}d|cxk  rt	        |      k  sJ  J t        dt	        |            D ]  }||   j                         }	|j                  ||          t	        |	      t	        |      k(  sJ ||   j                         |k(  sJ ||   j                         |k(  sJ t        t	        |            D ]I  }
|
|k(  r||
   |	|
   z   ||
<   t        j                  j                  j                  ||
   |	|
         ||
<   K |j                  ||           t        j                  |      }t        j                  r$t        j!                  |||d   j"                        }t        t	        |            D ]k  }||   }t%        |      s|j'                         }t)        |t*              s5t        j-                  |j.                  |j0                        s`t3        |      } n t5        d |D              }t        j                  j6                  j8                  d   }t)        |t              sJ t;        |             |du rt5        d |D              rt3        |      }t=        d |D              }|J t?        dt+        |||||      g 	      }tA        |      }g }tC        |      D ]  \  }}t)        |tD        tF        f      sJ t;        |             | jI                  |tJ        jM                  ||||   ||   d
            }t)        |tN              sJ t;        |             t)        |jP                  t              sJ t;        |jP                               |jP                  j                  |       t)        |jR                  tD              r|jR                  jU                         }n|jR                  }t)        |t@              s|jW                         s1|j                         x}EtY        |j:                        s\t[        |      ri|j                  |j]                                 t	        |      dkD  rMt        j                  j_                  |t`        jb                        rt        j                  je                  |       t        j                  jg                  |      |_4        | jk                  |jP                        |_(        t        j                  jm                  |       |S )z6
        Create the concat kernel from inputs
        r   rA   c              3  2   K   | ]  }t        |        y wr   )r  r{  s     r   r   z&ConcatKernel.create.<locals>.<genexpr>  s     -W1.CA.F-Wr|  Fc              3     K   | ]p  }d |j                   v xr\ |j                   d    j                  t        j                        xs- |j                   d    j                  t        j                         r yw)r  r%  N)rW  r  r  r   r"  r   args     r   r   z&ConcatKernel.create.<locals>.<genexpr>  ss      
<
  SXX  --E<O<O-P W88E?00u?U?U0V
<
s   A6A8c              3  j   K   | ]+  }t        |      xr |j                         j                   - y wr   )r  r  rG  r{  s     r   r   z&ConcatKernel.create.<locals>.<genexpr>  s/      
FG!!$A)A)AA
s   13N)r   r   r   r   rG  r   rC  r_  r  )7r  r  r   r   r   r   r  rn   r   r   rt  r  r  rB   r  r  r  r   r  r  r   rE  r  r   r   r-   r  current_noder   r   r   r  rf  r   rd  rp  rx  rx  r  rg  r_  rB  re  r  re   rd   r3  rq  rD   FOREACHregister_operation_listr-  r   r+  r.  )r  r_  r8  r   r   r  offsets_startoffsets_endr   
input_sizer  output_strider   rC  any_input_is_storage_and_layoutfx_node_argsrG  concat_kernelr  op_namesra  rO  input_unwrappeddevs                           r   r  zConcatKernel.create`  s   
 %%'q	##%q	**,-}oC'#h-'''''q#f+& 	.A++-J  #/z?c(m333!9&&(E111!9'')V3333x=) 8"*1+
1"=HQK"#''"2"2"L"L Z]#HQK	 x}-	. (6'H'H'R''"//xM
 s6{# 		Aq	A$Q'K88fmmT$B8$LM		 +.-WPV-W*W'ww++003,-AtL/AA-*e3 
<
 $
<
 
9
 ;8DM 
KQ
 
	 !!!$$# 

 M*' 	CFAscHj#9:EDIE:++  Cq!1;q> ! L lF3GT,5GG3m22D9U4@T@T;UU9  ''5#((H-"%(("6"6"8"%(( ?J7#335NN,,S9388$"<0 ? ? AB1	C4 x=1!4!4V^=S=S!TGG++H5WW44]C"11-2F2FG	""=1r   Nc                   t        |t              r| j                  |j                  |      S t        |t        t
        f      sJ t        |             t        |j                  t              rt        |j                  j                  t              r|j                  j                  sy|yt        |j                               t        |j                               k7  ryt        d t        |j                         |j                               D              S t        |j                  d      xrA t        |j                  j                  t               xr t        |j                  t"               S )NFTc              3  v   K   | ]1  \  }}t         j                  j                  j                  ||       3 y wr   r>  r?  s      r   r   z=ConcatKernel.can_realize_into_without_copy.<locals>.<genexpr>  s3      B   88R@r@  rC  )r   r   can_realize_into_without_copyrB  rd  rf  r   rp  rC  rE  r  r   r(  r   r   r  r  ExternKernelAlloc)r  ru  rv  s      r   r  z*ConcatKernel.can_realize_into_without_copy  s    c9%44SXXsCC#*56AS	A6chh 34sxx<xx00 { 3>>#$CNN,<(== !#.."2CNN4DE   CHHh' <388??N;<sxx):;;	
r   c                .    t         j                  | |      S r   )r  r%  r$  s     r   r%  z!ConcatKernel.get_free_symbol_uses  s     --dMBBr   c                ^   t        |t              s&t        |      rt        |      \  }}t        ||      }t        |t              sJ t	        |             t        |t
              r| j                  |j                  |      S t        |t              r`|j                          t        |j                  d      sJ | j                  ||      r&t        |      |j                  _        |j                  S t        j                  |j!                         |j#                         |j%                         t'        |j)                         |j)                               D cg c]/  \  }}t*        j,                  j.                  j1                  ||      1 c}}      }| j                  ||      S c c}}w )NrA  rC  r  )r   rH  r  rD  r   r   rx  rB  rf  r  r  r  rE  rC  r  r  r  r  r  r   r   rn   r   r   rt  )r  ru  rv  rK  rC  r  r  pws           r   rx  zConcatKernel.realize_into  sL   
 #/$S)"7"<%76B#/:c:/c9%##CHHc22c:&KKM388X...00c:"1#"6xx>>#--/__&  ?Aq   ::1a@	  
 C((s   4F)c                     yr  r   r  s    r   r  zConcatKernel.should_allocate  r  r   )r_  r=  r8  r   r   rf  r   )ru  r   rv  r  r   r   rZ  re  )ru  r   rv  r   r   r   rC  )r   r   r   rh  r  r  r  rY   r%  rx  r  r   r   r   r  r  Z  s    
 o ob 26!
!
/!
	!
 !
F N+$)C!C	!C ,C
 ) )Br   r  c                      e Zd ZU dZdZded<    ej                  e      Z	ded<   dZ
d	ed
<   dZded<   dZded<    ej                  e      Zded<   dZded<   dZded<    ej                  e      Zded<   dZded<    ej                  e      Zded<    ej                  e      Zded<   	 	 	 	 	 	 	 dA	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dB fdZdCdZdDdZdEdZdEd Z	 dF	 	 	 	 	 dGd!ZdHd"ZdFdId#ZdJd$ZdKd%ZdLd&Ze dMd'       Z!e"	 	 	 	 	 	 	 	 dNd(       Z#e"dOd)       Z$e"dPd*       Z%e"dPd+       Z&e"	 	 	 dQ	 	 	 	 	 	 	 	 	 dRd,       Z'e"	 dS	 	 	 	 	 	 	 dTd-       Z(e"	 dS	 	 	 	 	 	 	 dUd.       Z)e"dPd/       Z*e"dPd0       Z+e"dPd1       Z,e"dPd2       Z-dEd3Z.	 	 	 	 	 	 dVd4Z/dFdWd5Z0dXd6Z1dYd7Z2dSdZd8Z3dLd9Z4dHd:Z5dHd;Z6dHd<Z7d[d=Z8d\d>Z9 e:d       	 dS	 	 	 d]d?       Z;dLd@Z<e<Z= xZ>S )^r  z
    A class that represents Kernels which are not directly lowered to Inductor
    Loop Level IR, such as custom operators, or aten operators which we fallback to.
    r   r7  constant_argsr^  rs  r   NOptional[ReinterpretView]output_viewrI  python_kernel_namecpp_kernel_nameIterable[str]ordered_kwargs_for_cpp_kernelOptional[_OpOverloads]op_overloadzOptional[list[dict[str, Any]]]arg_propertieszdict[str, dict[str, Any]]allarg_propertiesz#Optional[dict[str, dict[str, Any]]]kwarg_propertiesz"dict[sympy.Symbol, pytree.KeyPath]unbacked_bindingszlist[MutationOutput]mutation_outputsc                D   t         |   |||       || _        |r|ni | _        || _        |
| _        | j                  |       | j                  |       |	| _        | j                          i | _
        g | _        t        j                  j                  | _        i | _        y Nr  )r@  r  r  r   r  r  set_cpp_kernel_nameset_python_kernel_namer
  collect_arg_kwarg_propertiesr  r  rn   r   r  fx_noder   )r  r   rC  r_  r  r   r  r  r  r
  r  rB  s              r   r  zExternKernel.__init__>  s     	 	 	

 + &fB&&  1##$67-J*))+!# "ww+++-r   c                     | g| j                   S r   )r  r  s    r   r  zExternKernel.get_outputs^  s    -t,,--r   c                    t               S r   r9   r  s    r   r  z%ExternKernel.get_unbacked_symbol_defsa  r  r   c                N   t        | j                  t        j                  j                        r\| j                  j
                  j                  D cg c]2  }|j                  s$|j                  |j                  |j                  d4 c}n+t        t        | j                              D cg c]  }i  c}| _        t        | j                  t        j                  j                        rP| j                  j
                  j                  D ci c]&  }|j                  |j                  |j                  d( c}ni | _        t        | j                  t        j                  j                        r| j                   sJ| j                  j
                  j                  D cg c]  }|j                  s|j                   c}| _        | j                  j
                  j                  D cg c]  }|j                  s| c}| _        y g | _        y c c}w c c}w c c}w c c}w c c}w )N)r   r   r  )r   r  )r   r  r  _ops
OpOverload_schema	arguments
kwarg_onlyr   	real_typer  r   r   r_  r  r  r
  schema_kwargs)r  r   r   s      r   r  z)ExternKernel.collect_arg_kwarg_propertiesd  s    $**EJJ,A,AB ))11;; || FFKK%&__ $C$456"6 	$ $**EJJ,A,AB ))11;; qOO
  	 d&&

(=(=>55$($4$4$<$<$F$F6 !,,AFF62  ++33==""D "$D? 76"s*   7H+	H+HH/H'H"9H"c                z    t        | j                  t              r!| j                          | j	                          y y r   )r   rC  r  apply_constraintr	  r  s    r   rh  zExternKernel.decide_layout  s-    dkk>2!!#  3r   c                    t        | |      \  }}|r|j                  |       |s| j                         }|r$ddlm}  || |d      }|j                  ||       y y )NrA   )'set_kernel_post_grad_provenance_tracingT)r+  )ra   make_commenttry_get_kernel_namer  r%  write_provenance_debug_handle)r  wrapperr  
origin_str_detailed_origin_strr%  debug_handles          r   codegen_commentzExternKernel.codegen_comment  sf     ,?tW+M(
(  ,224KFBkTL 11+|L r   c                    t         r   ru  r  r)  s     r   codegenzExternKernel.codegen  rv  r   c                   || _         t        j                  j                  r.t	        | j
                  t        j                  j                        sy | j
                  }| j                   |j                  dk(  rU|j                  dk(  r|j                  j                  d      d   n|j                  j                  dd      }d| d| _         y |j                  j                  | _         y y )Natenr  .r   r   z
at::_ops::z::call)r  rn   r   cpp_wrapperr   r  r  r  r  	namespace_overloadnamer   r  replacer  r   )r  r  r  opnames       r   r  z ExternKernel.set_cpp_kernel_name  s    .ww""*ejj33+
 !!'6) ++y8 OO))#.q100c: 
 *4F86'B$'-~~':':$ (r   c                   || _         |y | j                  }|y t        |t        j                  j
                        rd|j                   | _         y |j                  j                  dd       d|j                   | _         y )Nztorch.ops.higher_order.._ops..ops.r3  )	r  r  r   r  r  HigherOrderOperatorr   r   r7  )r  r  r  s      r   r  z#ExternKernel.set_python_kernel_name  s    "4)!!>

 > >?(??P&QD# $$,,Xw?@&//ARS #r   c                0   ddl m} | j                         x}r|j                  nt        j
                  j                  }t        j
                  j                  r| j                  S t        j
                  j                  rt        t        j
                  j                  |      s(J t        t        j
                  j                               | j                  y t        j
                  j                  j                  | j                  |      S | j                  S )NrA   )CppWrapperCpu)codegen.cpp_wrapper_cpur>  r  r   rn   r   device_type
fx_wrapperr  r4  r   rf  r  get_c_shim_func_name)r  r>  dr   s       r   r'  z ExternKernel.try_get_kernel_name  s    :!%!22A29L9L77***WW  agg22MB D$$E B ##+77''<<$$f  ***r   c                .    | j                         }|J |S r   )r'  r  s     r   get_kernel_namezExternKernel.get_kernel_name  s!    '')r   c           	        t         j                  | j                         | j                         | j	                         | j                         | j                         | j                               }|j                          |S )N)r   r   r  r  rr  rp  )	r  r  r  r  r  r   r  r  r  )r   r  s     r   
copy_inputzExternKernel.copy_input  sa    <<>++-]]_::<))+oo'  
 	

	r   c                b	   ||d}t        j                  |      \  }g g }g }|D ]  }j                  t        |t              xr t        |t
                      d   r|j                  |       Lt        |t              r5t        j                  j                  j                  j                  |d       }|j                  |        	 	 	 	 	 	 dfd}	|D 
cg c]  }
| j                  |
       }}
|D ]  }
t        |
      st        |
d        g }|D ]  }
t        |
t              se|
j!                         t        j                  j"                  v r;|j                  t        j                  j"                  |
j!                                   yt        |
t              se|
j!                         t        j                  j$                  v r;|j                  t        j                  j$                  |
j!                                   t        |
t&              r!|j                  |
j)                                t        |
t*        j,                  j.                  j
                        ro|
j0                  j2                  }|
j0                  j4                  dk(  r|J |j                  t*        j6                  j8                  |   j;                                |j                  t=        |
d               |	||      \  }} ||i |}d }t        j>                  j                  x}rt        j@                  jB                  jE                  d	      }tG               }t        j@                  jH                  t*        jJ                  jL                  jN                  u r|d
   }tQ        t        j@                        }|5  tS        |t        j@                  |       d d d        tU        |||      }t        |tV        tX        f      s|gn|}|D ]  }t        |t*        jZ                        s|j\                  s+t^        j`                  r<d}t        j                  j@                  jB                  jE                  dd       x}r| d| }|t        j                  _1         ||||	|fS c c}
w # 1 sw Y   xY w)N)r   r   r&  )r  c                $   g }t        |       }t        |      }D ]9  }|r|j                  t        |              |j                  t        |             ; t        j                  |      }|j                  dg       |j                  di       fS )Nr   r   )r  r  r  pytreetree_unflattenr  )	new_tensor_argsnew_non_tensor_argsr  
it_tensorsit_non_tensors	is_tensorr  	args_specis_arg_tensors	          r   unflatten_argsz3ExternKernel.process_kernel.<locals>.unflatten_args
  s     Fo.J!"56N* 8	MM$z"23MM$~"67	8
 %%fi8A55$aeeHb&999r   Tr  r  )r   r  rA   zEsparsity not handled. Please file issue for sparse inference weights.r  z Found from : 
 )rL  r   rM  r   r   ztuple[list[_T], dict[str, _T]])2rJ  tree_flattenr  r   r   GeneratorStater    rn   r   r   r   create_symintnoder  r  rD  rd  rh  	constantstorchbind_constantsr  	get_valuer  r  irr   r   r   r  default_generatorsclone_stater   r  r  rW  r  r
   r  _higher_order_opseffectswith_effectsr/   r4   r0   r   r   Tensor	is_sparserB   graph_partitiondisable_cudagraphs_reason)r  r  r   r   binded_args	args_flattensor_argsnon_tensor_argsr  rS  r   example_argsdevice_indexnew_args
new_kwargsexample_outputr  r   node_meta_valctxexample_out_lir  msgr  rQ  rR  s                           @@r   process_kernelzExternKernel.process_kernel  s     $v6%22;?	9%' 		,C  3'O
30O,O R ""3'c4(''**44FFsQUFVC&&s+		,	:)	:@L	:+	: 6AAs((+AA  	6A$Q'%a5	6 	 	  	LA a*qzz|qww?P?P/P##AGG$5$5ajjl$CDq(+JJLAGG$?$??##AGG$?$?

$MNA/##AKKM2Au11@@A xx~~xx}}.<3KKK##JJ11,?KKM ##$5aT$JK'	L*  .lOL*8Z8JN---9-NN//33E:M0;C~~$$(?(?(G(G(T(TT -a 0<Q^^L K	1>>>JK 9>=! ntUm<  	   		8A1ell+KK..]"#''"6"6";";"?"?t"TT;T E!2;-@C471		8 
 	
Y BjK Ks   R 9R%%R.c                >   t        |t              sJ t        |             t        |t              r|S |j	                         }t
        j                  j                  |j                               }|J |j                         }|d|j                  v rt        |t        t        t        f      rt        |j                  t              r|j                  d   j                  t         j"                        s-|j                  d   j                  t         j$                        r)|j'                  t)        |j+                                      n|j-                          t/        j0                  |j+                         d      \  }}|d   } |j3                         |      }t
        j                  j4                  j7                  ||      }t
        j                  j4                  j9                  ||      }	t
        j                  j4                  j;                  ||      }
t=        ||	      |
z   }||k7  rt>        jA                  d|	|
|       tB        t        |jD                  tG        |jI                         |jK                         |j+                         |	|
d      	      S )
z
        In order to pass this to an extern kernel we need a
        ReinterpretView not a View.  This allows us to avoid some
        unneeded copies.
        r  r  r  r}   r   z@convert_to_reinterpret_view failed: stride=%s offset=%s index=%sFrY  rA  )&r   rd  r   rH  re  rn   r   rh  rh  r  rW  rg  rp  rC  r  r  r  r   r"  r  r-   r   r	  rC   rz  r  r   r{  stride_vars
offset_varrf   ry  r  r  rB  rE  r  r  )r  r   x_unwrap_viewrm  x_unwrap_view_fx_node
index_argsr  rd  r   rJ  rF  expecteds               r   convert_to_reinterpret_viewz(ExternKernel.convert_to_reinterpret_viewl  s6    !X&/Q/&a)H gg  !7!7!9: # 3 3 5 "-.333=?FJ*OP=//@%**51??"'"5"5 @  )--e4BB"'"8"8 C 
 77.}/E/E/GH '')!-!@!@JJL"

J  ]
  ,  55eZH''""..ujA!!,,UJ?Z1F:HIIR	 &%,,.kkmZZ\

 
	
r   c           	        |
t               S t        |t        t        j                  j
                  j                  t        f      rt        |      S t        |t              rot               5  t        j                  j                  t        j                  |j                   |j#                         |j%                                     cd d d        S t        |t&              r|S t        |t(              r| j+                  |j,                        S t        |t.              r4t/        | j+                  |j,                        |j1                               S t        |t2              r;|j5                          t7        |j9                               r	 | j;                  |      S t        |t>              r|j5                          |S t        |t@        t        f      r|S | jC                  |      S # 1 sw Y   !xY w# t<        $ r Y cw xY w)N)r,  )r   r   rA  )"r  r   r    r   r   r   r   r   r   r  r;   rn   r   add_tensor_constantr  rI  r  r  r  r  r   r  rB  rH  r  rd  r  r  re  ry  r  rf  NonTensorObjrG  r  s     r   r  zExternKernel.realize_input  s   9'))a$ 3 3 ; ;SAB(a00a" () ww22LLallnU  a(Ha#$$QVV,,a)"&&qvv.q||~  a"IIK$Q]]_5::1== a$IIKHa,(=>?H~~a  3 " + s   ,AG(G5 (G25	H Hc                    t        |      r<t        |j                               dk(  r|S |j                         D ]  }|dk(  s	|c S  | j                  |      S r  )r  r   r(  rG  )r  r   r   s      r   require_stride1zExternKernel.require_stride1  sT     #1<<>"a',,. Q;H ~~a  r   c                2	   ||J |j                         dv r|s|S t        |      rt        |j                         t              r|rt        ||      xr$ t        |j                         j                         }t        |dd|rJt        t        j                  j                  j                  |j                         j                              n||       |S t        |ddd ||       |S t        |j                         t        t        f      rf|r|j                         j!                  |      s5|rCt#        ||j                         j                  |j%                               r|t'        ||      S |S t        |j                         x}t(              rwt        |j+                         x}t              rt-        d      t        |t              r<|r|j!                  |      s'|r't#        ||j                  |j%                               r|S t        |t.              rX|r|j                         j!                  |      s5|r5t#        ||j                         j                  |j%                               r|S t        |t0              rt        |j2                  t4              rt        |j2                  t6              st        |j9                         x}      rtt;        |d      rht        |j2                  t<              sN	 | j?                  |j2                        |_        |r| jA                  |||      S |r| jC                  |||      S 	 d }	|j%                         }
|t        j                  j                  }tG        tI        |j%                                     D cg c]<  }|jK                  ||   d	      r%|jM                  |j%                         |   d
      r|> }	}|	D ].  }tN        jP                  jR                  jU                  ||d	d      }0 | jW                  |      }t        |dd|||       |rt        ||      sJ |S |	r<|
|J tN        jP                  jR                  jY                  ||
      }t'        ||      S |S # tD        $ r Y Hw xY wc c}w )N)r   rA   TF)r  r  r  r  r  zHthe MutationLayoutSHOULDREMOVE's real layout shouldn't be FlexibleLayoutrB  r  r   r   rA   )-r  r  r   r  r  r  r2   r   rD  r   rn   r   r   size_hints_or_throwrE  rE  r  r;  r   rO  rf  rk  r	  rz  r   rB  rd  rH  re  r  r  ry  require_stride_orderrequire_exact_stridesr  r   r   r3  r~  r  r  loweringslice_rG  r  )r  r   r   r  r  use_current_stride_ordermutation_layoutrk  re  expanded_dims	orig_sizer   r   r8  s                 r   require_strideszExternKernel.require_strides  sS     M$===;;=F"=H !#!,,..9 0R50 0K3ALLN4I4IJJ - *#(-  8 - ! 0 0 D D$%LLN$9$9!" "'&3 H *#(-%)&3&3 HALLN[/,JK1<<>;;EB!1%q||~'<'<ajjl %0 4A}E 
 $%LLN25O $3$?$?$AA[N )b   [9{<<UC%5);+=+=qzz|
 H a%q||~77>-!1<<>#8#8!**,
 Hq)$1668,qvv7%Q]]_&DkEV,{//1BC88@335 4   #44= 5   # .2JJL	$ww''H s1::<0133M!4DaH11!**,q/1E M  % BOO,,33AsAqAB
 NN1!''	
 5a???  (]-FFF((//9=A21mDDW ' s   ;5R 1R AR	RRc           
         | j                  ||D cg c]4  }t        |t        j                        r|j                  j
                  n|6 c}|      S c c}w )N)r  r  )r  r   r  SymIntr   r,  )r  r   r  r  r   s        r   r  z"ExternKernel.require_exact_strides  sV     ""KXFGz!U\\:A ( # 
 	
s   9A
c                *    | j                  |||      S )N)r   r  )r  )r  r   r   r  s       r   r  z!ExternKernel.require_stride_order  s     ""1E"OOr   c                .    | j                  |t              S r   )r  r!  r  s     r   require_channels_lastz"ExternKernel.require_channels_last  s    ''+<==r   c                .    | j                  |t              S r   )r  r#  r  s     r   require_channels_last_3dz%ExternKernel.require_channels_last_3d  s    ''+=>>r   c                    dd} ||      r|S | j                  |t        j                  |j                                     S )Nc                    	 | j                         }|t        j                  j
                  v xr' t        j                  j
                  |   j                  S # t        t        f$ r Y yw xY wr  )rh  AttributeErrorr  rn   r   rW  	is_mkldnn)r   r   s     r   is_mkldnn_tensorz9ExternKernel.require_contiguous.<locals>.is_mkldnn_tensor  s]    zz| 177,,,R1B1B41H1R1RR #$78 s   A A)(A)r   r   r   r   r  r  r  r   )r  r   r  s      r   r  zExternKernel.require_contiguous  s@    	S AH,,>44QZZ\B r   c                h    | j                  |t        j                  |j                                     S r   r  r  s     r   require_contiguous_stridesz'ExternKernel.require_contiguous_strides  s-     ((~00>
 	
r   c                     y r   r   r  s    r   r#  zExternKernel.apply_constraint  r  r   c                   t        |t              sJ t        |             t        |t              st        |      }| j                  sJ d       t        |      }t        | j                        }||k  rqt        j                  d| j                  ||z
         t        ||      D ]>  }| j                  |   d   }|j                  ||v r||   n| j                  |   d          @ |S )Nz/ExternKernel.arg_properties should not be emptyzv%s has %d unprovided positional arguments. Will check if they are in the keyword arguments or will use default values.r   r  )r   r   r   r   r  r   ry  r  r  r   r  )r  r   r   n_args
n_pos_argsr   arg_names          r   fill_non_provided_argsz#ExternKernel.fill_non_provided_args  s     $)54:5)$%:D""U$UU"T,,-
 JII^  V#	 6:. ..q1&96) 8$,,Q/@ r   c                \   t         j                  j                  rGg }d }|r]| j                  rQt	        | j
                        t	        |      k(  sJ d       | j                  D ci c]  }|j                  d      | }}t        | j
                        D ]  \  }}|.|J |j                  ||         }|r|j                  d      nd }n\t	        | j                        |z   }	| j                  r6|	t	        | j                        k  r| j                  |	   j                  d      nd }|j                  t         j                  j                  j                  ||              |S | j
                  D 
cg c]+  }
t         j                  j                  j                  |
      - c}
S c c}w c c}
w )NzDnames passed to codegen_const_args does not match self.constant_argsr   r   )rn   r   r4  r  r   r  r  r   r_  r  rf  val_to_arg_str)r  r  r  name_to_arg_propertiesr  r   r   proptype_r   r  s              r   codegen_const_argszExternKernel.codegen_const_args  s   77F
 &*",,4--.#e*< Z< 594G4G*-0CGGFOS(*& * "$"4"45 M1)5 ,,,155eAh?D04DHHV,$Edkk*Q.C  ..3T=P=P9Q3Q ++C044V<! 
 agg22AA!UKLM MDHDVDVWqAGG((77:WW'*& Xs   $F$10F)c                   t         j                  j                  rC| j                  7| j	                  g | j
                  | j                  | j                        }d}n| j
                  }d}g }t        |      D ]  \  }}t         j                  j                  r| j                  r|t        | j                        k  sJ d       | j                  |   j                  d      }|j                  t         j                  j                  j                  ||             |j                  t         j                  j                  j                  |              |r|j                  | j!                                |S )NFTz-Invalid access to ExternKernel.arg_propertiesr   )rn   r   r4  r  r  r_  r  r   r   r  r   r  r  rf  r  r  r  )r  r_  need_codegen_constant_argsr   r   r   r  s          r   codegen_argszExternKernel.codegen_args   s5   774#3#3#?003$++3 2 23T[[F */&[[F)-&f% 	DDAqww""**q3t7J7J3K/K CK ++A.226:AGG00??5IJAGG00??BC	D &KK//12r   c                    ||v r|j                  |      S || j                  v r| j                  j                  |      S | j                  j                  |      x}|j                  d      S t        | d      )zGiven an argument name, queries for values in (in order):
        1. any provided kwargs for this function.
        2. the class self.kwargs member.
        3. any available default arguments in self.allarg_properties.r  z not in self.allarg_properties)r  r   r  r	  )r  r  r   r  s       r   get_kwargs_valuezExternKernel.get_kwargs_value  sx    
 v::h''t{{";;??8,,))--h77CD77?++z)GHIIr   c           	        t         j                  j                  r| j                  t	        | j
                        dk(  rg S g }| j                  D ]  }|r|dk(  r| j                  |      }t        |t              r|j                  |       >| j                  J | j                  j                  |i       j                  d      }|j                  t         j                  j                  j                  ||              |S | j                  j!                         D cg c]3  \  }}| dt         j                  j                  j                  |       5 }}}|S c c}}w )Nr   r   r   r  )rn   r   r4  r  r   r!  r
  r  r   r    r  r  r  rf  r  r   r  )r  skip_outr   r  r  r  ks          r   codegen_kwargszExternKernel.codegen_kwargs'  s=   77+D4F4F0G10L	F >> QE 1))(3a&MM!$11=== 2266xDHHPEMM!''"6"6"E"Ea"OPQ"  !KK--/Aq #Qqww++::1=>?F  	s    8Ec                    | j                   S| j                   j                  }t        |dd      }|j                  dd      }|j	                  dd      d   }| d| }|S d}|S )	Nr   unknown_namespacer:  r;  r3  rA   r   
unknown_op)r  r  r   r7  rsplit)r  r  op_namespaceop_names       r   get_op_namezExternKernel.get_op_nameA  sv    <<#\\((F"6<9LML'//'BL'..sA6q9L%ax0G  #Gr   c                   t         j                  rt        j                  j                  st        | j                               dk(  ry t        j                  j                  j                  | j                               }t        j                  j                  j                  | j                               }| j                         }|j                  d| j                          d| d| d|d	       y y y )Nr   zassert_size_stride(r  r  )rB   size_assertsrn   r   r4  ri   r   rf  codegen_shape_tupler(  r  rh  rh  )r  r)  r   r   r  s        r   codegen_size_assertsz!ExternKernel.codegen_size_assertsL  s    qww':':T]]_-277'';;DMMOLDWW))==doo>OPF&&(G%dmmo%6bb7+UVW (;r   c           	     H   t         j                  rt        j                  j                  sw| j                         }|t        j                  j                  v}| j                         }|r |j                  d| dt         d|d       y |j                  d| d| d       y y y )Nzassert_alignment(r  r  z	# buffer z (op: z) is assumed to be not aligned)
rB   alignment_assertsrn   r   r4  rh  r  r  rh  rb   )r  r)  r   alignedr  s        r   codegen_alignment_assertsz&ExternKernel.codegen_alignment_assertsX  s    ##AGG,?,?==?D!''";";;G&&(G!!'vR/@7+QO !!vVG94RS -@#r   c                    t         j                  j                  rt        j                  j
                  ry|j                          | j                         }|j                  d| d| d       y)zc
        Track outputs of fallback operators if config.test_configs.track_memory_lifecycle
        Nztrack_tensor(z, 'z'))	rB   test_configstrack_memory_lifecyclern   r   r4  "write_memory_track_allocation_oncerh  rh  )r  r)  r   s      r   codegen_memory_trackingz$ExternKernel.codegen_memory_trackingf  sV     ""99QWW=P=P224}}M$s4&;<r   c                N    | j                         }| j                         }|g g|fS )zD
        get output sizes and strides, for template_codegen
        )r   r(  )r  r  r  s      r   get_group_stridezExternKernel.get_group_strideq  s*     //#r{G##r   c                   t         j                  j                  }| j                         }| j	                         }|D cg c]  }|j                  |       }}t        t        |            D cg c]  }t        d|        }}t        t        t        |            |j                  d      }t        |      D 	ci c]  \  }}	|	|
 }
}}	t        t        |
            D cg c]  }|
|   	 }}|D cg c]  }||   	 }}| j                         } ||      }t         j                  j                  j                  |||g      \  }}}t        d      \  }}t        t!        | ||D cg c]
  } ||       c}                  }t#        t%        j&                  |      |      }|t)        |      fS c c}w c c}w c c}	}w c c}w c c}w c c}w )zC
        Manually get canonicalization of the output index
        rC  T)ra  r@  c)rn   r   r   r   r(  r   r   r   rg   rf  r(  r   r  r  rM   r   r   rj   r   r  r   )r  r   r  rJ  r   r   r  index_orderr   r   r   r   r  r   	new_sizesr   r  r   add_varreplacements                       r   canonicalizezExternKernel.canonicalizez  s   
 77##//#29:Q8%%a(::;@U;LMa(1QC1M
MU3w<0g6I6ISWX+4[+ABxsC#s(BB$)#f+$67q77-23jm3
3##%
#%&WW%5%5%E%Ew&
"	7F !%
73z7	3R1GAJ3R+STU5<<.<eI&&&+ ;M C73 4Ss#   F5/F:>F?$G6G
+Gc                    |rt         nt        }t        j                  | |      }| j                  D ]  }| ||      z  } | j
                  j                         D ]  }| ||      z  } |S r   )maybe_free_unbacked_symbolsmaybe_free_symbolsry  r%  r  r   r   )r  r^  maybe_get_symbolsr  r  s        r   r%  z!ExternKernel.get_free_symbol_uses  s|     ,9'>P 	 --dMB%% 	(C"3''A	(;;%%' 	(C"3''A	(r   c           
     "   t        | dd       }d|g}|t        j                  |       D cg c]'  }|j                   dt        | |j                         ) c}z  }|j	                  d| j
                         | j                  |      S c c}w )Nr  zpython_kernel_name=r  r  )r   ri  fieldsr   r  rr  r  )r  r  r  rj  s       r   r  zExternKernel.__str__  s    d$8$?!+1
 	$++D1
 zzl!GD%**567
 	
 	|D$4$4#789u%%
s   ,Br   NNNNr   N)r   rI  rC  r  r_  r  r  r7  r   dict[str, Any] | Noner  r  r  rI  r  rI  r
  r	  r  r  r   r   r  r  r8  r   )r)  rs   r  rI  r   r   r)  rs   r   r   r  rI  r   r   )r  rI  r   r   rH  rU  )r   r   r   r   )r  r|   r   r   r   r   r   zituple[Any, list[Any], list[Any], Callable[[Any, Any], Any], Optional[dict[sympy.Symbol, pytree.KeyPath]]])r   r   r   rH  r'  )NNF)
r   r   r   Optional[Sequence[int]]r  rF  r  r   r   r   rZ  )r   r   r  rT  r  r   r   r   )r   r   r   r\  r  r   r   r   )r   r7  r   rs  r   r7  )r  ro  r   r   r   r   )r  r   r   r   r   r   )r  r   r   r   )r   z'tuple[list[Sequence[Expr]], list[Expr]])r   ztuple[Expr, Sequence[Expr]]re  )?r   r   r   rh  r  r   ri  rj  r   r   r  r  r  r   r
  r  r  r  r  r  r  r  r  r  r  rh  r-  r0  r  r  r'  rE  rk  rG  r  rq  ry  r  r~  r  r  r  r  r  r  r  r#  r  r  r  r  r  r  r  r  r  r  r  rY   r%  r  r  r  r  s   @r   r  r  "  s   
 $&M=%.[..tDFND-1K*1(,,%)O]) 4E;3D3D4!=  +/K'.59N293D;3D3D40  =A9@<MK<M<M=9  .?[->->t-T*T (*(,15,0)-79.2.. . :	.
 %. &. /. *. '. (5. ,. 
.@.#$J! KOM+M:GM	M"";0+$
 
 
 |
!|
*-|
9<|

|
 |
| C
 C
J !! !!F ! !  *.6:#aa 'a 4	a
 a 
a aF QV	
	
'9	
JN	
		
 	
 DIPP,P=AP	P P
 > > ? ?  " 
 
"!"+9"	"HXB4J4	
	=$'> N+$)!	! ,
& Hr   r  c                  f     e Zd ZddZ	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddZ xZS )ExternKernelOutc                &    |j                  |        y r   )generate_extern_kernel_outr/  s     r   r0  zExternKernelOut.codegen      **40r   c
                &   | j                  |      }
t        |
t              sJ t        |
             t        |   d ||
||xs i d ||||	
       t        j                  j                  |       | _	        t        j                  j                  |        y r   )r+  r   r   r   r@  r  rn   r   r-  r   r.  )r  rC  r_  r  r   r  r  r  r
  r  unwrapped_inputsrB  s              r   r  zExternKernelOut.__init__  s      ..v6*H5Mt<L7MM5Lb)	
 GG++D1		""4(r   c                     yr  r   r  s    r   r  zExternKernelOut.should_allocate  r  r   r  r  )rC  r  r_  r=  r  r7  r   Optional[dict[str, Any]]r  r  r  rI  r  rI  r
  r7  r  r  r   r   rC  )r   r   r   r0  r  r  r  r  s   @r   r  r    s    1 (*+/15,0)-79.2)) !) %	)
 )) /) *) ') (5) ,) 
):r   r  c                        e Zd Zd fdZ xZS )RandomSeedsc                   t        j                  t         j                        }t        |   t        |t         j                  |g      g |j                  |j                  |ggddt        j                  j                         y )Nr\  zaten.randint.low_outzat::_ops::randint_low_out::call)rC  r_  r  r  r  r  )r  r  r  r@  r  rE  r	  r  r2  randintlow_out)r  countr   limitsrB  s       r   r  zRandomSeeds.__init__  sl    U[[)kkW
 !::vzzE7;5 >,, 	 	
r   )r  r   r   rN  r   r   r   r   r   r  r  r  s   @r   r  r    s    
 
r   r  c                  h     e Zd ZddZ	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddZd	dZ xZS )
r  c                &    |j                  |        y r   )generate_extern_kernel_allocr/  s     r   r0  zExternKernelAlloc.codegen  s    ,,T2r   c	                F   | j                  |      }	t        d |	D              sJ t        
|   d |t	        t
        t           |	      ||xs i d ||||
       g | _        t        j                  j                  |       | _        t        j                  j                  |        y )Nc              3  <   K   | ]  }t        |t                y wr   r  )r   r   s     r   r   z-ExternKernelAlloc.__init__.<locals>.<genexpr>  s     CQ:a(Cr  )r+  r   r@  r  r   r   r   rB  rn   r   r-  r   r.  )r  rC  r_  r  r   r  r  r
  r  r  rB  s             r   r  zExternKernelAlloc.__init__  s      ..v6C2BCCCC&!#34Lb)	
 ')GG++D1		""4(r   c                     yr  r   r  s    r   r  z!ExternKernelAlloc.should_allocate  r  r   c                    t         r   ru  r  s    r   r#  z"ExternKernelAlloc.apply_constraint  rv  r   r  )r   NNNr   N)rC  r  r_  r=  r  r7  r   r  r  rI  r  rI  r
  r7  r  r  r   r   rC  r8  )r   r   r   r0  r  r  r#  r  r  s   @r   r  r    s    3 (*+/,0)-79.2)) !) %	)
 )) *) ') (5) ,) 
)@"r   r  c                  T     e Zd ZdZ	 	 	 	 	 	 	 	 d fdZddZd	dZd
dZddZ xZ	S )rC  zP
    An output buffer that represents the mutation of a pre-existing buffer
    c                    t         |   d |       |j                         }t        j                  j                  |       |g| _        || _        t        j                  j                  |       | _	        y r  )
r@  r  rh  rn   r   ri  mutation_namesmutating_noder-  r   )r  rC  mutated_noder  mutated_node_namerB  s        r   r  zMutationOutput.__init__"  sb     	d62(113	##$5601(5GG++D1	r   c                    | j                   S r   )r  r  s    r   r  zMutationOutput.get_defining_op,  s    !!!r   c                    | j                   S r   )r  r  s    r   r1  z!MutationOutput.get_mutation_names/  rE  r   c                     yr  r   r  s    r   r  zMutationOutput.should_allocate2  r  r   c                ^    | j                         }d |D        D cg c]  }|| c}S c c}w )Nc              3  Z   K   | ]#  }t         j                  j                  |       % y wr   )rn   r   try_get_buffer)r   r   s     r   r   z6MutationOutput.get_mutation_buffers.<locals>.<genexpr>9  s     P..t4Ps   )+)r1  )r  r  rm  s      r   get_mutation_buffersz#MutationOutput.get_mutation_buffers5  s=    002 QP
 
 	
 
s   *)rC  r  r  r   r  rp  r   r   r  rg  rC  r   r=  )
r   r   r   rh  r  r  r1  r  r  r  r  s   @r   rC  rC    sA    2 2062GP2	2"#
r   rC  c                       e Zd ZU dZi Zded<   e	 	 	 	 	 	 d	d       Ze	 	 	 	 	 	 d	d       Z	 	 	 	 	 	 	 	 d
 fdZ	ddZ
ddZ xZS )TMADescriptorad  
    An IR node representing a generic host-side TMA descriptor in the Triton API
    Mostly useful for user-defined Triton kernels relying on host-side TMA;
    but can, in principle, be used for Inductor's Triton templates, too.

    See TMADescriptorExperimental and TMADescriptorStable for the two implementations
    (the old API and the new API)
    zdict[Any, TMADescriptor]_CACHEc                    t        |      dk(  sJ |d   dk(  rt        |g|d    S |d   dk(  sJ t        |g|d    S )Nr   r   experimentalrA   r}  )r   TMADescriptorExperimentalTMADescriptorStable)r  rI  tma_metas      r   _create_implzTMADescriptor._create_implL  s\     8}!!!A;.(,VBhqkBBA;(***&v<<<r   c                    t        |      |f}|| j                  vr| j                  ||      | j                  |<   | j                  |   S r   )idr 	  r	  )r  rI  r	  ra  s       r   r  zTMADescriptor.createW  sF     &z8$cjj !..vx@CJJsOzz#r   c           
     @   t         |   d t        t        ||j	                                     t        t        t           |      t        |      d        || _	        t        j                  j                  |       | _        t        j                  j                  |        y )NrA  )r@  r  rE  rH  r  r   r   rg  r   rI  rn   r   r-  r   r.  )r  rI  r_  r  rB  s       r   r  zTMADescriptor.__init__`  s     	 !,,. &!6*- 	
  GG++D1		""4(r   c                &    |j                  |        y r   )generate_tma_descriptorr/  s     r   r0  zTMADescriptor.codegenw      ''-r   c                    | j                   S r   )rI  r  s    r   
get_tensorzTMADescriptor.get_tensorz  r  r   )rI  r   r	  ztuple[str, tuple[Any, ...]]r   r  )rI  r   r_  r7  r  r7  r   r   r  rY  )r   r   r   rh  r 	  r   r  r	  r  r  r0  r	  r  r  s   @r   r  r  >  s     (*F$)=='B=	= = 'B	 ))&3)DQ)	)..r   r  c                  :     e Zd ZdZ	 d	 	 	 	 	 	 	 	 	 d fdZ xZS )r	  z
    the new host-side TMA Descriptor API:
    (the ones obtained via create_{1d,2d}_tma_descriptor calls).

    See also TMADescriptorStable for the new API.
    c                b   t        |      dv sJ t        |      t        |      k(  sJ ||j                         j                  }|| _        || _        || _        t        | j                        | _        |g}g | j                  | j                  | j
                  }t        | !  |||       y )N)rA   r   rI  r_  r  )	r   r  r  r  
block_dimselement_sizer  r@  r  )r  rI  r  r	  r	  r_  r  rB  s          r   r  z"TMADescriptorExperimental.__init__  s     4yF"""4yC
O+++!++-66L	$(		N	
YY
__
 
 	' 	 	
r   r   )
rI  r   r  list[Union[int, torch.SymInt]]r	  r	  r	  rW  r   r   r   r   r   rh  r  r  r  s   @r   r	  r	  ~  sG     '+

 -
 3	

 $
 

 
r   r	  c                  $     e Zd ZdZd fdZ xZS )r	  z
    the new host-side TMA descriptor API
    (the ones obtained via TensorDescriptor.from_tensor).

    See also TMADescriptorExperimental for the old API.
    c                :    || _         t        | 	  ||g|       y )Nr	  )block_shaper@  r  )r  rI  r	  rB  s      r   r  zTMADescriptorStable.__init__  s(    &8% 	 	
r   )rI  r   r	  r	  r	  r  s   @r   r	  r	    s    
 
r   r	  c                  <     e Zd Z	 	 	 	 	 	 	 	 	 	 d fdZddZ xZS )SubgraphBufferc                   t         
|   d ||       || _        || _        t        j
                  j                  |       | _        t        j
                  j                  |        t        j
                  j                  | j                  ||      | _
        t        | j                        sJ t        | j                        }|D ]T  }|| j                  j                  |j                  <   | j                  j                  j!                  |j                         V |D cg c]  }|j                   c}| _        dd lmc m}	 t	        j*                  | j                        5  |	j-                  ddd      5   | j                  j.                  | j                    d d d        d d d        y c c}w # 1 sw Y   xY w# 1 sw Y   y xY w)Nr   FATEN)max_autotunemax_autotune_gemmmax_autotune_gemm_backends)r@  r  rY  example_inputsrn   r   r-  r   r.  make_subgraphsubgraphr  r_  rb  r  graph_input_namesr  
sym_inputstorch._inductor.configr  rB   set_graph_handlerr   run)r  rC  r   rY  r 	  subgraph_namer$	  sym_inpsym_varinductor_configrB  s             r   r  zSubgraphBuffer.__init__  sn    	v{3,GG++D1		""4(--dgg~}U,,,(5
! 	AG7>DMM&&w||4MM++227<<@	A 8BBG7<<B88  / 	8 &&""'+1 '  8
 "!!4#6#678	8 	8	 C8 8	8 	8s*   F* F;5$F/F;/F8	4F;;Gc                    G d d      }t        | j                        sJ | j                  D cg c]  }|j                          }}|j                   || j                        g | j
                  || j                  g       y c c}w )Nc                      e Zd ZddZy),SubgraphBuffer.codegen.<locals>.CodegenGraphc                4    || _         |j                  | _        y r   )r   r   )r  r   s     r   r  z5SubgraphBuffer.codegen.<locals>.CodegenGraph.__init__  s    "
!JJ	r   N)r   rt   )r   r   r   r  r   r   r   CodegenGraphr.	    s    'r   r0	  )r  r_  r  'codegen_subgraph_with_flattened_outputsr"	  r$	  r   )r  r)  r0	  r  outer_inputss        r   r0  zSubgraphBuffer.codegen  sz    	' 	'
  ,,,7;{{C!++-CC77'-doo--YYK	
 Ds   B)
rC  r  r   r  rY  torch.fx.GraphModuler 	  	list[Any]r(	  r   r  r   r   r   r  r0  r  r  s   @r   r	  r	    s>    "8"8 ""8 !	"8
 ""8 "8H
r   r	  c                       e Zd ZddZed	d       Z ed       	 d
	 	 	 d fd       ZddZ	 	 	 	 	 	 	 	 	 	 d fdZ	ddZ
ddZ xZS )UserDefinedTritonKernelc                D   ddl m} ddlm} |j	                  | j
                        g }g }g }t        |      rt        d      r%|j                  fdj                  D               n)t        d      sJ |j                  j                         t        d      r:j                  D ]*  }|j                  j                  j                  |          , n)t        d      sJ |j                  j                         j                   }j                  |||fS )	Nr   )	Autotuner)kernel_side_tablerestore_idxc              3  P   K   | ]  }j                   j                  |     y wr   )r   	arg_names)r   r   r  s     r   r   zBUserDefinedTritonKernel.get_kernel_and_metadata.<locals>.<genexpr>  s%      */0FII''**s   #&restore_value	reset_idxreset_to_zero)triton.runtime.autotunerr9	  *torch._higher_order_ops.triton_kernel_wrapr:	  
get_kernel
kernel_idxr   r  r  r;	  r>	  r?	  r  r   r=	  r@	  configs)r  r9	  r:	  rE	  restore_value_argsreset_to_zero_argsr   r  s          @r   get_kernel_and_metadataz/UserDefinedTritonKernel.get_kernel_and_metadata  s   6P"--doo>(*(*fi( v}-")) *4:4F4F*  v777"))&*>*>?v{+)) FA&--fii.A.A!.DEF v777"))&*>*>?nnGYYFw 24FFFr   c                   ddl m} | j                         \  }}}}|j                  ||| j                  ||| j
                        \  }}}	}
| j                  D ci c]  }|| j                  |       }}|j                  D cg c]  }|j                   c}|j                  D cg c]  }|j                  s|j                   }}t        fd|D              }g }g }g }g }t        j                  |j                         t!        t        j"                  d      |
            D ]q  \  }}||v r |       r|j%                  |       |j%                  |       t'        |t(              r?|j%                  |j+                                |j%                  |j-                                t'        |t.        t0        t2        t4        j6                  f      r,|j%                  |       |j%                  t9        |             ||v r(|j%                  d       |j%                  t.               |R	  |       r(|j%                  d       |j%                  t.               8|j;                          |j;                          Zt=        dt9        |       d|        | j?                  ||       |jA                  |||||||	d	| jC                         | jD                  j                  

       yc c}w c c}w c c}w )YOverrides the parent member.
        See https://github.com/pytorch/pytorch/issues/151692r   )triton_version_uses_attrs_dictc              3  (   K   | ]	  }|     y wr   r   )r   r   r=	  s     r   r   z2UserDefinedTritonKernel.codegen.<locals>.<genexpr>.  s     $FaYq\$Fs   r  r&  NzUnsupported arg type: r  T)	arg_typesraw_argsraw_keystriton_metainductor_metar  r   original_fxnode_name)#r  rK	  rH	  !define_user_defined_triton_kernelr   gridr
  r  paramsr   is_constexprnumr:   r  r  r  r   repeatr  r   r   r  r  r   r  r   r   r    r   r  r  r-  generate_kernel_callr  r  )r  r)  rK	  r  rE	  rF	  rG	  new_namerP	  rQ	  extra_launch_argsr  
named_argsr  
constexprsconstexpr_namesr   rM	  raw_keys_filteredraw_args_filteredr   r  r=	  s                         @r   r0  zUserDefinedTritonKernel.codegen  s   
 	I ((*	
 55KKII
	
 261S1S
,-At$$Q''

 
 &,]]3QVV3	%+]]EannaeeE
E$$F:$FF!	')')"I$4$4R$8:K L
 $	WID# &+I+K$$T*$$S)#v&C1134  1C#udEJJ!?@C   c+( B  % 23KKO$$S)%))+%))+),B49+RPSu*UVVI$	WL 	Wh/$$&&#'??$!%!2!2 	% 	
e
 4Es   K+K0'K59K5c                P    t         |   |      t        | j                  |      z  S r   )r@  r%  r(   rT	  rA  s     r   r%  z,UserDefinedTritonKernel.get_free_symbol_usesh  s-     w+M:=MII}>
 
 	
r   c                    t               S r   r9   r  s    r   r  z0UserDefinedTritonKernel.get_unbacked_symbol_defsr  r  r   c               B   g }i }g }|j                         D ]  \  }}	t        |	t              rXt        j	                  | j                  |	            }
||v rt        j                  |
||         }
|j                  |
       |
||<   n|j                  |	       |	||<    t        |      dk7  sJ |d   j                         | _        t        |t              sJ t        |             t        | =  d t!        | j                        |t#        |      |       || _        || _        | j)                         \  }}}}t+        |d      sJ |j,                  D cg c]	  }||v s| c}| _        ddlm} t        |      dkD  r|d   j4                  ni } ||i |||      D cg c]  }||   	 c}| _        | j6                  D cg c]#  }t9        t!        | j                        ||       % c}| _        t<        j>                  jA                  |        y c c}w c c}w c c}w )Nr   r  r=	  )identify_mutated_tensors)!r  r   r   ry  r  r  r  r  r  r   r  r   r   r   r@  r  r\  r   rD	  rT	  rH	  r  r=	  r
  rB	  rd	  r   mutable_argsrC  r  rn   r   r.  )r  rD	  rT	  tma_descriptor_metadatakernel_argsr_  r   r  r  r  r  r  rE	  r   r  rd	  autotuned_kwargsra  rm  rB  s                      r   r  z UserDefinedTritonKernel.__init__u  s!     "$&&(%%' 		DAq!Y' 99$:L:LQ:OP//%,,Q0G0JKAa q	$$Q'q			 6{aQi**,&(+9T&\9+dkk*- 	
 %	 $ < < >A v{+++!++.
sk/AC.
* 	X03Gq0@71:,,b 03;3"23'
 
 ((!
 :T[[93E!
 	
""4().

!
s   	HH H(Hc                ,    t        | j                        S r   )r   r  r  s    r   r  z#UserDefinedTritonKernel.get_outputs  s    D))**r   c                    | j                   S r   r  r  s    r   r  z"UserDefinedTritonKernel.get_device  r  r   )r   z(tuple[Kernel, Any, list[str], list[str]]r  rZ  re  r  )
rD	  r   rT	  r   rf	  rs  rg	  rs  r   r   r  rK  )r   r   r   rH	  r   r0  rY   r%  r  r  r  r  r  r  s   @r   r7	  r7	    s    G@ X
 X
t 56$)
!
	!
 7
=) =) 	=)
 "0=) $=) 
=)~+r   r7	  c                  T     e Zd ZdZddZddZd	dZd
dZ	 	 	 	 	 	 	 	 d fdZ xZ	S )InplaceBernoulliFallbackE
    This needs to be a custom class to handle mutation properly
    c                   t        d | j                  D              sJ d | j                  D        \  }t        j                  j                  r\|j                  | j                          d| ddj                  t        t        | j                               d|j                          y |j                  | j                          d| ddj                  t        t        | j                               d|j                          y )Nc              3  <   K   | ]  }t        |t                y wr   r  r   r  s     r   r   z3InplaceBernoulliFallback.codegen.<locals>.<genexpr>  s     >Q:a(>r  c              3  X   K   | ]"  }t        t        |      j                          $ y wr   )r   r   r  rp	  s     r   r   z3InplaceBernoulliFallback.codegen.<locals>.<genexpr>  s     IVQ113Is   (*r  r  z, NULL)r  )r   r_  rn   r   r4  rh  rE  r  r  reprr  ending)r  r)  r   s      r   r0  z InplaceBernoulliFallback.codegen  s    >$++>>>>IT[[I77 '')*!A3b3tTEWEW;X1Y0ZZabibpbpaqr '')*!A3b3tTEWEW;X1Y0ZZ[\c\j\j[klr   c                     yr  r   r  s    r   r  z(InplaceBernoulliFallback.should_allocate  r  r   c                &    | j                  d      gS r  r  r  s    r   r1  z+InplaceBernoulliFallback.get_mutation_names      "##r   c                    t               S r   r9   r  s    r   r  z1InplaceBernoulliFallback.get_unbacked_symbol_defs  r  r   c                ^   t         |   d t        |j                               | j	                  |g      ||       t
        j                  j                  |j                                t
        j                  j                  |       | _
        t
        j                  j                  |        y )Nr  r  )r@  r  r\  r  r+  rn   r   ri  rh  r-  r   r.  )r  r  r   r  rB  s       r   r  z!InplaceBernoulliFallback.__init__  s     	alln-$# 	 	
 	
##AJJL1GG++D1		""4(r   r  rC  rg  r  )r  r|   r   r   r  r   r   r   
r   r   r   rh  r0  r  r1  r  r  r  r  s   @r   rl	  rl	    sF    $)'),2)DG)	) )r   rl	  c                  x     e Zd ZdZddZd	dZd
dZddZ	 	 	 	 	 	 	 	 d fdZe		 d	 	 	 	 	 	 	 dd       Z
 xZS )InplaceCopyFallbackrm	  c                R    | j                         \  }}}|j                  |||       y r   )r  codegen_device_copy)r  r)  rv  ru  non_blockings        r   r0  zInplaceCopyFallback.codegen  s)    #'#4#4#6 c<##Cl;r   c                     yr  r   r  s    r   r  z#InplaceCopyFallback.should_allocate  r  r   c                &    | j                  d      gS r  rv	  r  s    r   r1  z&InplaceCopyFallback.get_mutation_names  rw	  r   c                    t               S r   r9   r  s    r   r  z,InplaceCopyFallback.get_unbacked_symbol_defs  r  r   c                   t         |   d |||dd       t        j                  j	                  |d   j                                t        j                  j                  |       | _        t        j                  j                  |        y )Nz
aten.copy_aoti_torch_copy_)r  r  r   )	r@  r  rn   r   ri  rh  r-  r   r.  )r  rC  r_  r  rB  s       r   r  zInplaceCopyFallback.__init__  sr     	+. 	 	
 	
##F1I$6$6$89GG++D1		""4(r   c                    ||fD cg c]  }| j                  |       }}|f}t        t        |j                               ||      }|S c c}w r  )r  r}	  r\  r  )r  rv  ru  r	  r  r_  r  r  s           r   r  zInplaceCopyFallback.create  sX     25c
;1###A&;;%$cnn./

  <s   Ar  rC  rg  r  )rC  r  r_  r=  r  r7  r   r   rZ  )rv  r   ru  r   r	  r   r   r}	  )r   r   r   rh  r0  r  r1  r  r  r  r  r  r  s   @r   r}	  r}	    s~    <$)) !) %	)
 
)$ <A

%
59
	
 
r   r}	  c                  8    e Zd ZdZddZd	dZd
dZddZd	dZy)MutatingFirstArgExternKernelrm	  c                   t        | j                        sJ g d | j                  D        t        t        | j                        }|j                  | j                          ddj                  |       d|j                          y )Nc              3  <   K   | ]  }|j                           y wr   )r  rp	  s     r   r   z7MutatingFirstArgExternKernel.codegen.<locals>.<genexpr>"  s     9a!!#9r  r  r  r  )	r  r_  r  rr	  r  rh  rE  r  rs	  )r  r)  argrefss      r   r0  z$MutatingFirstArgExternKernel.codegen  s~    ,,,
9T[[9
t))*
 	##%&a		'(:';1W^^<LM	
r   c                     yr  r   r  s    r   r  z,MutatingFirstArgExternKernel.should_allocate)  r  r   c                &    | j                  d      gS r  rv	  r  s    r   r1  z/MutatingFirstArgExternKernel.get_mutation_names,  rw	  r   c                    t               S r   r9   r  s    r   r  z5MutatingFirstArgExternKernel.get_unbacked_symbol_defs/  r  r   c                     yr  r   r  s    r   has_side_effectsz-MutatingFirstArgExternKernel.has_side_effects2  r  r   Nr  rC  rg  r  )	r   r   r   rh  r0  r  r1  r  r	  r   r   r   r	  r	    s     
$r   r	  c                        e Zd Zd fdZ xZS )ResizeStorageBytesc                   t        |t              sJ d       t        |   d t	        |j                               | j                  |g      |f       t        j                  j                  |j                                t        j                  j                  |       | _        t        j                  j                  |        d| _        d| _        t        |t         t"        t$        f      sJ t'        |             t        j                  j(                  j+                  |j,                  j                                y )NzTODO: dynamic shapesr  )r  z"inductor_ops.resize_storage_bytes_z&torch::inductor::resize_storage_bytes_)r   r   r@  r  r\  r  r+  rn   r   ri  rh  r-  r   r.  r  r  rd  rf  r   r   never_reuse_buffersr  rB  )r  variabler  rB  s      r   r  zResizeStorageBytes.__init__7  s    (C(@*@@(h1134
+#+	 	 	
 	
##H$5$5$78GG++D1		""4("FG(Xz9$EFVXVF	##''(>(>(@Ar   )r	  r   r  r   r   r   r  r  s   @r   r	  r	  6  s    B Br   r	  c                  (     e Zd Zd fdZddZ xZS )SetSourceTensorKernelc                   |j                          t        | 	  |j                         ||gdt        j
                  j                  j                  j                         t        |t        t        t        f      sJ t        |             t        j                  j                   j#                  |j$                  j'                                t        j                  j                   j#                  |j'                                t        j                  j                   j#                  | j'                                |j)                         }t+        t-        |      ||       t+        t-        |      ||       g| _        y )Nz!torch.ops.aten.set_.source_Tensor)r  r  r  )r	  r@  r  r  r  rl   r2  set_source_Tensorr   rd  rf  r   r   rn   r   r	  r  rB  rh  r  rC  r\  r  )r  self_tensorstorage_tensorr   rB  s       r   r  zSetSourceTensorKernel.__init__I  s   $$&%%'.)B		++99	 	 	
 +*i'HI 	
4L
 	
I 	
##''(8(8(A(A(CD	##''(?(?(AB	##''8**,:V4k4H:V4ndK!
r   c                F    | j                  d      | j                  d      gS r  rv	  r  s    r   r5  z2SetSourceTensorKernel.get_inputs_that_alias_output]  s    "DOOA$677r   )r	  r   r	  r   r   r   rg  )r   r   r   r  r5  r  r  s   @r   r	  r	  H  s    
(8r   r	  c                  j     e Zd ZdZd
dZddZddZddZddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZ	S )ScatterFallbackz
    This needs to be a custom class to handle mutation properly.
    This class handles both aten.scatter_ and aten.scatter_reduce_.
    It also handle the case `src` being a scalar properly.
    c                &    |j                  |        y r   )generate_scatter_fallbackr/  s     r   r0  zScatterFallback.codegenh  s    ))$/r   c                     yr  r   r  s    r   r  zScatterFallback.should_allocatek  r  r   c                f    | j                   d   }t        |t              sJ |j                         gS r  r  r  ra  s     r   r1  z"ScatterFallback.get_mutation_namesn  s.    kk!n#v&&&r   c                    t               S r   r9   r  s    r   r  z(ScatterFallback.get_unbacked_symbol_defss  r  r   NTr  include_selfc          
     f   t        |t              | _        | j                  r%|||fD cg c]  }| j                  |       }	}|f}
n$||fD cg c]  }| j                  |       }	}||f}
t        |   d t        |j                               | j                  |	      |
||dt        |      ddg|       t        j                  j                  |j                                t        j                  j                  |       | _        t        j                  j!                  |        y c c}w c c}w )Nr  r	  r  r	  )r  r
  r  )r   r   src_is_tensorr  r@  r  r\  r  r+  r   rn   r   ri  rh  r-  r   r.  )r  r  r   r8  r   ru  r  r	  r  tensorsr  rB  s              r   r  zScatterFallback.__init__v  s    (Y7 78%oFt))!,FGF FM78%jAt))!,AGA #JMalln-(|<";/+3^*D# 	 		
 	
##AJJL1GG++D1		""4(% G Bs   D)D.r  rC  r  r  )r  r|   r   r   r8  r   r   r   ru  r   r  rI  r	  r   r   r   r{	  r  s   @r   r	  r	  a  s|    0 
 !%!!)!!) !) 	!)
 !) !) !) !) 
!) !)r   r	  c                  \     e Zd ZdZddZddZd	dZd
dZ	 	 	 	 	 	 	 	 	 	 	 	 d fdZ xZ	S )IndexPutFallbackzQ
    This needs to be a custom class to handle mutation and indices properly
    c                &    |j                  |        y r   )generate_index_put_fallbackr/  s     r   r0  zIndexPutFallback.codegen  s    ++D1r   c                     yr  r   r  s    r   r  z IndexPutFallback.should_allocate  r  r   c                &    | j                  d      gS r  rv	  r  s    r   r1  z#IndexPutFallback.get_mutation_names  rw	  r   c                    t               S r   r9   r  s    r   r  z)IndexPutFallback.get_unbacked_symbol_defs  r  r   c           	        || _         |D cg c]  }||	 }}||g|D cg c]  }| j                  |       }}d}	t        
|   d t	        j                               | j                  |      |fd|	|       t        j                  j                  | j                  d             t        j                  j                  |       | _        t        j                  j                  |        y c c}w c c}w )Naoti_torch_index_put_outr  zaten.index_put_)r  r  r  r   )rk  r  r@  r  r\  r  r+  rn   r   ri  r  r-  r   r.  )r  r  r   rk  r   
accumulater   valid_indicesr	  r  rB  s             r   r  zIndexPutFallback.__init__  s     $+=qq}==34f2M}2MNQ4%%a(NN4alln-(M0+# 	 	
 	
##DOOA$67GG++D1		""4( >Ns   C/C/C4r  rC  rg  r  )r  torch._ops.OpOverloadr   r   rk  r4	  r   r7  r	  r   r   r   r{	  r  s   @r   r	  r	    s`    2$)*) ) 	)
 ) ) 
) )r   r	  c                  &    e Zd Zedd       ZddZy)
DeviceCopyc           	     Z   |j                         }|J |j                         st        |      t        j                  j
                  vrt        d |j                         D              rt        j                  j                  sit        j                  j                  r>t        j                  j                  |       t        j                  j                  |       |j                  |      S t        j                  j                  |       t        j                  j                  |       t        d       |f}t        j!                  |      }d }|j#                         r|j%                         }t'        |j(                        xr |j(                  dk(  xr |}|j(                  dk(  xr t'        |j(                        xr |}|r t+        |      rd|j-                         _        t1        t3        ||j5                         |j#                         ||      | j7                  |      g|      S )Nc              3  T   K   | ]   }|t         j                  j                  v  " y wr   )rn   r   rW  r  s     r   r   z$DeviceCopy.create.<locals>.<genexpr>  s     GqA***Gs   &(zDeviceCopy in input programr  TrB  )r  r+  ri  rn   r   r  r   r  rB   aot_inductoruse_runtime_constant_foldingr4  add_device_infor/  r]   r  r  r   r(  re   r   r  r  rG  r	  rE  r  r  )	r  r   r   r	  x_devicer  r   is_destination_pinnedis_source_pinneds	            r   r  zDeviceCopy.create  s   <<>###Qqww'>'>>GA4D4D4FGG''DDww"" ''/''1''//	'	)78%++A.::<\\^F8==!KfkkU&:K| 	 MMU"Kvfkk':K| 	  5a 8'+ALLN$

/ q!"

 
	
r   c                   | j                         }t        |      dk(  sJ | j                  r2|j                  |d   | j                  j	                         |d          y |j                  |d   | j	                         |d          y )Nr   r   rA   )r  r   r  r	  r  )r  r)  r   s      r   r0  zDeviceCopy.codegen  s{      "4yA~~''Q));;=tAw ''Q1G1G1I4PQ7Sr   N)r   r   r   rN  r	  r   r   r   r  )r   r   r   r  r  r0  r   r   r   r	  r	    s    -
 -
^Tr   r	  c                       e Zd ZdZddZd	dZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 d
 fdZddZ ed       	 d	 	 	 dd       Z	ddZ
 xZS )DynamicSelectStorageOffseta  
    The result of computing a dynamic selection index is determined as follows: when the index in the
    select operation is unbacked, the actual index calculation is ambiguous for negative indices
    (index + size) versus non-negative indices (just index). To resolve this, we allocate an unbacked
    SymInt to represent the storage offset and decompose the select operation into a call to as_strided,
    computing the storage offset at runtime with this node.
    c                    t               S r   r9   r  s    r   r  z$DynamicSelectStorageOffset.get_reads  r  r   c                     yr  r   r  s    r   r  z*DynamicSelectStorageOffset.should_allocate  r  r   c                    t         |   d t        t        j                  d            g        || _        || _        || _        || _        || _	        || _
        y Nr  r  )r@  r  r\  r  r   unbacked_offset_symbolr   base_offsetbase_dim_strider   r  )r  r	  r   r	  r	  r   r  rB  s          r   r  z#DynamicSelectStorageOffset.__init__  sS     	ze1DErJ '=#
&.	
r   c                .    t        | j                  g      S r   )r:   r	  r  s    r   r  z3DynamicSelectStorageOffset.get_unbacked_symbol_defs$  s    466788r   c                .    t        | j                  |      S r   )r(   r   r$  s     r   r%  z/DynamicSelectStorageOffset.get_free_symbol_uses'  s      

M::r   c                >    |j                  | | j                         y )Nr  )codegen_dynamic_select_indexr  r/  s     r   r0  z"DynamicSelectStorageOffset.codegen-  s    ,,T,Dr   rb  rC  )r	  sympy.Symbolr   r	  r	  Union[sympy.Symbol, int]r	  r	  r   r	  r  r   r   r   r  rZ  re  r  r   r   r   rh  r  r  r  r  rY   r%  r0  r  r  s   @r   r	  r	    s     ,  .	
 2 '  
&9 89$);!;	!; :;
Er   r	  c                       e Zd ZdZddZd	dZ	 	 	 	 	 	 	 	 	 	 d
 fdZddZ ed       	 d	 	 	 dd       Z	ddZ
 xZS )DynamicSliceSizeac  
    Computes the output size of a slice call, handling the correct semantics in codegen.
    We do this for flexible handling for unbacked indices (to not data-dependent error).

    Slicing has 4 semantics for indices, i.e. x[start:] could be:
    1) start < -x.size(0)            -> x[0:]                    # negative out-of-bounds
    2) start in [-x.size(0), 0)      -> x[x.size(0) + start:]    # negative slicing
    3) start in [0, x.size(0))       -> x[start:]                # standard slicing
    4) start >= x.size(0)            -> empty slice              # positive out-of-bounds

    If the appropriate semantics are known beforehand, the output size is computed based on
    the start & end indices. If not (with unbacked indices), a new unbacked symbol is created
    to represent the output size, and codegen handles computing the correct case.
    c                    t               S r   r9   r  s    r   r  zDynamicSliceSize.get_readsA  r  r   c                     yr  r   r  s    r   r  z DynamicSliceSize.should_allocateD  r  r   c                    t         |   d t        t        j                  d            g        || _        || _        || _        || _        || _	        y r	  )
r@  r  r\  r  r   unbacked_size_symbolr  r  r  r   )r  r	  r  r  r  r   rB  s         r   r  zDynamicSliceSize.__init__G  sH     	ze1DErJ$8!
		r   c                .    t        | j                  g      S r   )r:   r	  r  s    r   r  z)DynamicSliceSize.get_unbacked_symbol_defsW  s    444566r   c                t    t        | j                  |      j                  t        | j                  |            S r   )r(   r  r  r  r$  s     r   r%  z%DynamicSliceSize.get_free_symbol_usesZ  s0      

M:@@TXX}5
 	
r   c                &    |j                  |        y r   )codegen_dynamic_slice_sizer/  s     r   r0  zDynamicSliceSize.codegenb  r  r   rb  rC  )
r	  r	  r  r	  r  r	  r  r	  r   r	  r  rZ  re  r  r	  r  s   @r   r	  r	  1  s~    * ( &	
 ' ' 7 ./$)
!
	!
 0
1r   r	  c                  T     e Zd ZdZddZddZ	 	 	 	 	 	 	 	 d	 fdZd
dZddZ xZ	S )r   z;
    The result of a call to aten._local_scalar_dense.
    c                    t               S r   r9   r  s    r   r  zDynamicScalar.get_readsk  r  r   c                     yr  r   r  s    r   r  zDynamicScalar.should_allocaten  r  r   c                    |j                          t        | 	  d t        t	        j
                  d            | j                  |g             || _        || _        y r	  )	r  r@  r  r\  r  r   r+  symkeypath)r  r	  r	  rB  rB  s       r   r  zDynamicScalar.__init__q  sM     	*ELL$78$:M:Mtf:U	
 r   c                .    t        | j                  g      S r   )r:   r	  r  s    r   r  z&DynamicScalar.get_unbacked_symbol_defs{  s    488*%%r   c                &    |j                  |        y r   )codegen_dynamic_scalarr/  s     r   r0  zDynamicScalar.codegen~  s    &&t,r   rb  rC  )r	  r	  r	  zpytree.KeyPathrB  r   r   r   r  r  )
r   r   r   rh  r  r  r  r  r0  r  r  s   @r   r   r   f  sA    *8@F	&-r   r   c                  l     e Zd ZdZddZd	dZd
 fdZd	dZ ed       	 d	 	 	 dd       Z	ddZ
 xZS )r   z5
    The result of a call to aten._assert_scalar
    c                    t               S r   r9   r  s    r   r  zAssertScalar.get_reads  r  r   c                     yr  r   r  s    r   r  zAssertScalar.should_allocate  r  r   c                ~    t         |   d t        t        j                  d            g        || _        || _        y r	  )r@  r  r\  r  r   scalarrp  )r  r	  rp  rB  s      r   r  zAssertScalar.__init__  s7    ell512	
 r   c                     yr  r   r  s    r   r	  zAssertScalar.has_side_effects  r  r   c                .    t        | j                  |      S r   )r(   r	  r$  s     r   r%  z!AssertScalar.get_free_symbol_uses  s      ];;r   c           	        t         j                  sy t        t        | j	                  d                  }t
        j                  j                  ry t
        j                  j                  rad| d}t
        j                  j                  j                  | j                  d      }|j                  d| d| j                   d| d	       y t
        j                  j                  j                  | j                  d      }|j                  d
| d       |j                  dt        | j                         d       |j                  | j!                          d       y )NFr]  zstd::to_string(r  )r  zif (!(z()) { throw std::runtime_error("Expected z but received " + z); }zif not (z):z    raise RuntimeError(z = None)rB   scalar_assertsr  r  r%  rn   r   rA  r4  rf  codegen_cpp_sizevarr	  rh  rp  codegen_python_sizevarrr	  rh  )r  r)  symbol
symbol_strsizevars        r   r0  zAssertScalar.codegen  s4   $$ d44454IJK77WW  *6(!4Jgg**>>e ? G 	!J488*Tfgqfrrwx gg**AAe B G 	45 7TXX7GqIJ  19:r   rb  rC  )r	  rp   rp  r   r   r   rZ  re  r  )r   r   r   rh  r  r  r  r	  rY   r%  r0  r  r  s   @r   r   r     sM    	 N+$)<!<	!< ,<
;r   r   c                  "    e Zd ZU ded<   ded<   y)ExternKernelNoder   r   zexport_schema.Noder   Nr   r   r   r   r	  r	    s    
I
r   r	  c                       e Zd ZdZ	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZd fdZddZddZddZe		 	 	 	 	 	 dd	       Z
dd
ZddZddZd Zedd       Ze	dd       Zedd       Z xZS )FallbackKernelz
    A class that represents a fallback kernel for handling operators that are not
    directly support by inductor. It currently supports functional ops, view ops,
    inplace aten ops, and mutating ops that are auto-functionalizable.
    Nr  c                   t            |t        |      t        |      |       d _        |xs i  _        t        |t        j                  j                  t        j                  j                  f      sJ d| dt        |       d       | _        | _        |i n| _         j                  J t        j                   j#                   j                         g  _        g  _        t         j                  t        j                  j                        ry d j                  j)                         v ry  j                  j*                  }t        j,                  j.                  j1                   j                        r- j&                  j3                  |d   j5                                y dd}	|j6                  r?t9         j                        s* |	 j                        st;        d	 j                          j                   j<                   j>                        \  }
}d fd
}t        j,                  j.                  jA                  ||
|      D ]  \  }} |||        y )Nrz	  F#Fails to create FallbackKernel for r   not supported_c10d_functionalr   c                "   t         j                  j                  | j                         t         j                  j                  j
                        xs> t        | d      xr0 t         j                  j                  j
                  | j                  v S )N
py_kernels)r  _C%_dispatch_has_kernel_for_dispatch_keyr   DispatchKeyFunctionalizer  r	  rR  s    r   has_functionalize_implz7FallbackKernel.__init__.<locals>.has_functionalize_impl  sg    88AA	588//==  L) HHH((66"--G	r   z'NYI: Can't generate FallbackKernel for c                    t         j                  t        j                        r&t        |t        t
        f      sJ t        |             t        j                   j                        rt        |t
        t        f      rJ |y  j                  y d fd}t        j                   j                        r||D ]
  } ||        y y t        j                   j                        sJ  ||       y )Nc                   j                   j                  | j                                j                  J j                  j                  r?j
                  j                  t        t        | j                               |              y y r  )	alias_namesr  rh  
alias_infois_writer  rC  r\  r  )r  infor  s    r   	add_aliaszPFallbackKernel.__init__.<locals>.handle_aliasing_and_mutation.<locals>.add_alias1  sj      ''

5222??++))00&z'H!TR ,r   )r  r   r   r   )
r   r   r  ListTyper   r   library_utilsis_tensor_like_typer
  is_tensorlist_like_type)r
  r  r	
  optional_tensor_argr  s   `   r   handle_aliasing_and_mutationz=FallbackKernel.__init__.<locals>.handle_aliasing_and_mutation#  s    $))U^^4!#e}5@tCy@500; &cE4=999{& 44TYY??/2 7+!"567 # %88CCC#r   )rS  r	  r   r   )r
  ztorch._C.Argumentr  r   r   r   )!r@  r  r   use_runtime_dispatchr  r   r  r  r  r<  r   r  rS  r   r  rn   r   warn_fallbackr
  r  r   r  _libraryrK  mutates_and_returns_first_argr  rh  
is_mutabler%   r  r_  r  
zip_schema)r  rC  r  rf  nontensor_argsrS  r   r  schemar
  r   r
  r
  r  rB  s   `             r   r  zFallbackKernel.__init__  s%    	+.!	 	 	
 %*!!2!8bUZZ**EJJ,J,JK
 	X04<.W	X 
 ","Nb&&222	d556 '))+d&&

(F(FG !1!1!6!6!88
 !!)) >>==d>N>NO&&{1~'>'>'@A	 *4+;+;<*4+;+;<%9$:J:J9KL  **4;;8J8JKf	> --88vN 	4ID#(s3	4r   c                ,   t         |          }| j                  t        j                  j
                  j                  u rT| j                  D ]E  }t        |t              s|j                  t        j                  |j                                     }G |S r   )r@  r  r  r  _prims	rng_primsgraphsafe_run_with_rng_stater  r   rU  	with_readrC   r  rh  )r  rj  r  rB  s      r   r  zFallbackKernel.get_read_writesE  sy    g-/u||55RRR)) c>2"-"7"7$,,S\\^<#K r   c           	     n    |j                  | j                         | j                  t        | dd             S Nr  )(codegen_unbacked_symbol_defs_for_outputsrh  rB  r   r/  s     r   codegen_unbacked_symbol_defsz+FallbackKernel.codegen_unbacked_symbol_defsQ  s0    ??MMOT\\749Ld+S
 	
r   c                    t        | dd       x}rKt        t        j                  j                  j
                  |      }|J t        |j                               S t               S r
  r   r5   rn   r   r   r   r:   r}  r  r  resolveds      r   r  z'FallbackKernel.get_unbacked_symbol_defsV  _     '.A4 HHH0  **,=H '''hmmo..<r   c                Z   t         j                   G d d             }t        | j                        sJ | j                  D cg c]  } ||j	                                }}| j                  || j                        \  }}t        j                  j                  rt        | j                  t        j                  j                        r| j                  ||      }t!        | j                  j"                  j$                  |      D cg c]9  \  }}t        j                  j&                  j)                  ||j*                        ; }}}n6|D cg c]+  }t        j                  j&                  j)                  |      - }}| j,                  j/                  |       |S c c}w c c}}w c c}w )Nc                       e Zd ZU ded<   ddZy))FallbackKernel.codegen_args.<locals>.Shimr   refc                    | j                   S r   )r)
  r  s    r   r  z2FallbackKernel.codegen_args.<locals>.Shim.__repr__e  s    xxr   NrU  )r   r   r   r   r  r   r   r   Shimr(
  a  s    H r   r+
  )ri  	dataclassr  r_  r  rS  r  rn   r   r4  r   r  r  r  r  r  r   r  r  rf  r  r   r   rH  )r  r+
  r   rf  r   r   params          r   r  zFallbackKernel.codegen_args`  sY   				  	  
	   ,,,<@KKHqtA//12HH**;8J8JKf77:d.>.>

@U@U#V..tV<D !$D$4$4$<$<$F$F ME1 $$33AuGD 
 EIIqAGG((77:IDI 	6" I
 Js   F>F"0F(c                F   | r!| D cg c]  }t        |t              r| c}nd }|r7| sJ | D cg c]#  }|j                         s|j                         % }}|d   S t        |t        j                        r|j
                  S t        |t        t        f      rxt        d |D              }|D cg c]  }|s|	 }}t        |      dk(  r|d   S |D ]7  }t        |t        j
                        sJ t        |j                        s5|c S  |d   S y c c}w c c}w c c}w )Nr   c              3  H   K   | ]  }t         j                  d |        y wr   )r	  find_devicer{  s     r   r   z-FallbackKernel.find_device.<locals>.<genexpr>  s'      $  **43$r  rA   )r   r  r  r  r`  r   r   r   r:   r   re   r   )rf  rl  r  non_torch_bind_tensor_argsr  devices
device_setr   s           r   r0
  zFallbackKernel.find_devicex  s%     $J1:a+IQJ 	#
 &;3>SC#..BRs~~'SGS1:nell3!(((ntUm4# $ ($ J -7A&&vAGA7|q qz!! "!&%,,777&++&!M" 1:3 K T Bs!   DDDD1D9Dc                2    ddl m}  || j                        S )Nr   )	is_impure)torch._library.utilsr5
  r  )r  r5
  s     r   r	  zFallbackKernel.has_side_effects  s    2 ))**r   c                   t        | j                  t        j                  j                  t        j                  j
                  f      s+J d| j                   dt        | j                         d       t        | j                  t        j                  j
                        sSd| j                  j                         vr7| j                  j                  j                  rt        | j                        rg S | j                  S )Nr	  r  r	  r	  )r   r  r  r  r  r<  r   r   r  r
  r%   r
  r  s    r   r5  z+FallbackKernel.get_inputs_that_alias_output  s    uzz44ejj6T6TU
 	
 2$2B2B1C2D$$%&n6	
 
 4++UZZ-K-KL"$*:*:*?*?*AA  ((33&t'7'78I###r   c                N    t        | j                        dk  sJ | j                  S r  )r   r  r  s    r   r1  z!FallbackKernel.get_mutation_names  s'    4&&'1,,,"""r   c           	        t         j                  d| j                         | j                         t	        | t
              sJ t        |              | j                  | j                  | j                        \  }}| j                  ||      }| j                  D cg c]  } | j                  |fi | }}| j                  }t        j                  j                  sg ||S t!        dg       }|j#                  |||      }	 	 	 	 	 	 dd}t	        |t$        j&                  j(                  j*                        r#|j-                  |d   |d         j.                  }	n|j0                  j.                  }	t3        |	      dk(  r>| j4                  r| j4                  n| j6                  }
|	d   j8                  } |||
      g}n9t;        |	| j4                        D cg c]  \  }} ||j8                  |       }}}| j                  J t=        | j                         t?        j@                  | j                  jC                         ||i             }t        jD                  jG                  |       g ||S c c}w c c}}w )	a  
        ProxyExecutor Design Note
        We export the ExternFallbackNodes (for custom ops) into a serialized file
        and run it with a host side proxy executor to address the ABI problem
        This is currently only implemented for fbcode. Eventually, we will also make this work for OSS.
        Detailed design doc can be found at
        https://docs.google.com/document/d/1wC4DOZFaYym2t1Esz0X5yxlLI3RDnSiyRbUus3bkJ64/edit?usp=sharing
        z4Extern kernel node added for node %s with target %s.Nc           	        t        | t        j                  t        j                  f      r|}t        |t        t
        f      rt        |      dk(  sJ |d   }t        | t        j                        rTt        |t              sJ t        j                  j                  t        j                  |j                                     S |J t        j                  j                  d      S t        | t        j                        rt        | j                         t        j                        rpt        |t              sJ t!        |             t        j                  j                  |D cg c]&  }t        j                  |j                               ( c}      S t        | t        j"                        rt        | j                         t        j                        r|>t        j                  j                  t        j$                  j                  d            S t        |t              sJ t        j                  j                  t        j$                  j                  t        j                  |j                                           S t        | t        j&                        r t        j                  j                  |	      S t)        d
t!        |              c c}w )NrA   r   r  )	as_tensorT)as_none)
as_tensors)as_optional_tensor)as_intzUnsupported return type )r   r  
TensorTypeNoneTyper   r   r   r   export_schemarq   r  TensorArgumentrh  r

  getElementTyper   r   OptionalTypeOptionalTensorArgumentIntTypeRuntimeError)return_typerQ  r   s      r   handle_single_outputzFFallbackKernel.export_extern_kernel_node.<locals>.handle_single_output  sh    +(8(8%..'IJftUm4v;!+++ )Ck5+;+;<%c6222(1188"/">">CLLN"S 9   ;&;(11888FFK8Z**,e.>.>> "&(3AT&\A3$--44 $*  &44#,,.I  5   K););<**,e.>.>B >(1188+8+O+O+V+V$( ,W , 9   &ff555(1188+8+O+O+V+V&3&B&B%+__%6' ,W , 9   K7$--44F4CC"%=d;>O=P#QRR7 s   )+K!r   rA   )r  r_  rB  r  )r   r   )rI
  z6Union[torch.TensorType, torch.ListType, torch.JitType]rQ  Union[IRNode, Sequence[IRNode]]r   zexport_schema.Argument)$ry  r  rh  r  r   r	  r   rS  r_  r  r  r
  r  rn   r   aot_moder$   serialize_inputsr  r]  	torchbindCallTorchBindr
  returnsr  r   rB  r  r   r   r	  rB
  r8   r   extern_kernel_nodesr  )r  r   r   ra  ordered_kwargsr  
serializernamed_argumentsrJ
  rP
  rB  rI
  output_argumentsreturn_schemarQ  r   s                   r   export_extern_kernel_nodez(FallbackKernel.export_extern_kernel_node  sj    			BMMO	
 $/;d;/**4;;8J8JKf**48 99
 "D!!#00
 
 !!ww+T+N++*44
$55fdFK3	SO3	S33	S $3	Sj fe55??MMNmmDGT!W5==Gnn,,Gw<1 '+lldll8M8MG!!*..K 4[' JK .1$,,-G 
 *M6	 %!++    +++##'',,.&(	
 	
$$T*''''K
` s   I3I8c                "     j                   }|J |j                  dk(  rt        |t        j                  j
                        sJ t        |             t        j                  j                  rddl
m} t        |      |vrt        j                  d|       d _        n~|j                  dk(  r4t        |t        j                  j
                        sKJ t        |             t        j                  j                  r!|t         j"                  j$                  v _        t        j                  j                  rt        |t        j                  j
                        r j                  sdfd j'                   j(                   j*                        \  }t-        j.                  | fd	 j0                  D              }t3        fd
t5        ||j6                  j8                        D               _         j;                  |        j                  r j=                         } j>                  J  j                   J |jA                   jC                          j>                   fd j                   | jD                  r jD                  n jF                         n^|jI                          t         jJ                  tL              r3 jO                  |        jQ                  |        jS                  |        jU                  |       y)rJ	  Nr2  r   )inductor_fallback_opszG%s is missing a c-shim implementation, using proxy executor as fallbackT
_quantizedc                    t        | t        j                        r | j                               S t        | t        j                        S r   )r   r  rE
  rD
  
NumberType)r  	is_numbers    r   r]
  z)FallbackKernel.codegen.<locals>.is_numberX   s:    a!3!34$Q%5%5%788!!U%5%566r   c              3  D   K   | ]  } j                   |fi   y wr   )r  )r   r  r   r  s     r   r   z)FallbackKernel.codegen.<locals>.<genexpr>e   s+       *D))!6v6s    c              3  l   K   | ]+  \  }}t        |t              xr  |j                         - y wr   )r   complexr   )r   r  r  r]
  s      r   r   z)FallbackKernel.codegen.<locals>.<genexpr>j   s5      ,Aq 1g&A9Q[[+AA,s   14c                 H    g  j                          j                         S r   )r  r  r  s   r   r_  z(FallbackKernel.codegen.<locals>.<lambda>x   s$    F$++-F0C0C0EF r   )r  ztorch.JitTyper   r   )+r  r5  r   r  r  r  r   rn   r   r4  torchgen.aoti.fallback_opsrY
  r   ry  r  r
  rB   r	  custom_ops_to_c_shimsrS  r_  r  r  r  r
  r  r   r  r  r-  rW
  r  ,generate_fallback_kernel_with_runtime_lookuprh  rB  r  generate_fallback_kernelrC  r  r  r  r  r 
  )	r  r)  r  rY
  r   	args_iterexported_argsr]
  r   s	   `      @@r   r0  zFallbackKernel.codegen1   so    !!!!!v%fejj&;&;<Jd6lJ<ww""Lv;&;; KKa 15D--fejj&;&;<Jd6lJ<WW   f11GGG % GG65::#8#89--7  ..t{{D<N<NOLD& "!??I ), ,	6>>+C+CD, )D%
 	W%$$ ::<M**666##///@@''F   $$2G2G ,,T2$++v.))'2..w7,,W5))'2r   c           	         d}	 | j                         }t        | j                  | j                  t        | j                               t        | j                               |      S # t        $ r Y ]w xY w)NFrB  )rG  rH
  rE  r   r   r[   r   r   )rQ  rG  s     r   tensor_to_layoutzFallbackKernel.tensor_to_layout   sj    		((*I MMLL%fkkm4%fmmo6
 	
  		s   A& &	A21A2c           	     \    t         j                  f}||vr,t        t        d   t        j
                  j                        }n
t               }|5    j                  |g|i |\  }}}}	}
ddd       t        d D               j                  |      }|sit        |t        j                  j                  j                        s&|t        j                   j"                  j$                  u rt        j&                  d      }|  t)        |      ||	|
      n"|sJ d         t+        |      ||	|
      d fd |g       }t        |t,        t.        f      r	|_        |S t        |t2              rt/        |      _        |S |g_        |S # 1 sw Y   4xY w)	z9Create an instance of FallbackKernel from an _OpOverloadsNc              3  2   K   | ]  }t        |        y wr   )r  r  s     r   r   z(FallbackKernel.create.<locals>.<genexpr>   s     !K,s"3!Kr|  r  r  r   r  z"Not sure where to find device infoc                    t         t        t        f      r. t                fdt	        t                     D              S t         t              r: j                         D ci c]  \  }}| |t               |fgz           c}}S t         t        j                        rnt        j                               }t        j                  sst               s3t        j                   j"                  j%                  |j&                         |S t         t(              r S t         t        j*                        r j,                  j.                  S  J dt                d       y c c}}w )Nc              3  T   K   | ]  } |   t              |fgz          ! y wr   )r   )r   r   generate_outputrk  rQ  s     r   r   zAFallbackKernel.create.<locals>.generate_output.<locals>.<genexpr>   s5      $ $F1Iw4<:K9L/LM$r>  zFallbackKernel output type z is not supported)r   r   r   r   r   r   r   r  r  r`  MultiOutputri
  rB    assume_unaligned_fallback_outputrk   rn   r   r  r  r   r   r  r   r,  )	rQ  rk  ra  r  rm  r  ro
  has_unaligned_inputpackeds	   ``   r   ro
  z.FallbackKernel.create.<locals>.generate_output   sH   &4-0#tF| $"3v;/$   FD) %+LLN S g$v,9L8M.MNN  FELL1!((0 ;;*,V4GG--11#((;
FC(FELL1{{'''~ 1$v,?PQ~ 3s   +#E3)rQ  r   rk  zlist[tuple[Any, int]]r   r   )r2  *_fused_moving_avg_obs_fq_helper_functionalr   r	   rn   r   r  r
   rq  r  r0
  r   r  r]  rN
  rO
  rl   higher_orderprintr   r\  r  r   r   rB  r   )r  r  r   r   fake_incorrect_kernelscontextrl  rf  rg  rS  r  r   rB  ro
  rr
  rs
  s   `            @@@r   r  zFallbackKernel.create   s    #'"Q"Q!S//1$79J9JKG!mG 	< #""6;D;F;!	< "!K{!KKn= vu66@@NNO//555\\%(F!&)"3F ???6!0"3F 	  	D "."5ge}-$FN  &"7^FN  &YFNo	< 	<s   F!!F+r   rC  r  r  r|   rf  r=  r
  r7  rS  r  r   r  r  ,Optional[dict[sympy.Symbol, pytree.KeyPath]]r   r   r`  r  r  r  )rf  z Optional[Sequence[torch.Tensor]]rl  r7  r   r   rC  rg  )rQ  r  r   rE  )r  r|   r   r   r   r   r   r	  )r   r   r   rh  r  r  r 
  r  r  rk  r0
  r	  r5  r1  rW
  r   r0  ri
  r  r  r  r  s   @r   r	  r	    s    ,0u4 KOu4u4 u4 &	u4
 &u4 +u4 )u4 Hu4 
u4n


 0 5GT	 >+$*#w(r S3 S3j 
 
 _ _r   r	  c                  Z     e Zd ZdZddZddZddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	 fdZ xZS )
ComplexViewz9View a complex number as two dtyped numbers or vice versac                     yr  r   r  s    r   r  zComplexView.should_allocate   r  r   c                &    | j                  d      gS r  rv	  r  s    r   r5  z(ComplexView.get_inputs_that_alias_output!  s    "##r   Nrl
  c          	     2    t         |   |||||||       y )Nrl
  )r@  r  	r  rC  r  rf  r
  rS  r   r  rB  s	           r   r  zComplexView.__init__!  s,     	/ 	 	
r   rC  rg  )rC  r  r  r|   rf  r=  r
  r7  rS  r  r   r  r  rz
  r   r   )r   r   r   rh  r  r5  r  r  r  s   @r   r|
  r|
     sq    C$ )-JN

 
 &	

 &
 +
 &
 H
 

 
r   r|
  c                      e Zd ZdZddZy)MemoryCheckKernelz
    Custom kernel for memory checking that generates direct function calls

    TODO - the custom op was erroring with str inputs. should be able to custom op directly.
    c                    |j                          | j                  \  }}}t        |      }t        |      }|r|j                  d       d| d| d| d}n	d| d| d}|j                  |       y)z.Override codegen to write direct function callzV# note: dont currently distinguish between buffers returned and dealloc'd in last stepzcheck_memory_step(allocated=z, freed=z, is_final_step=r  N)r  r  rr	  rh  )r  r)  
alive_list	dead_listis_final_step
alive_repr	dead_reprcalls           r   r0  zMemoryCheckKernel.codegen"!  s     	224/3/A/A,
I}*%
O	h 2*Xi[P`an`oopqD1*Xi[PQRD$r   Nr  )r   r   r   rh  r0  r   r   r   r
  r
  !  s     r   r
  c                       e Zd ZU ded<   ddZy)r  rN  r   c                    | j                   S r   r  r  s    r   r  zMultiOutputLayout.get_device8!  r  r   NrK  )r   r   r   r   r  r   r   r   r  r  4!  s    r   r  c                  v     e Zd ZddZ	 d	 	 	 	 	 	 	 	 	 d fdZ ed       	 d	 	 	 d	d       Zd
dZddZ xZ	S )rp
  c                    |j                  |        | j                  s#| j                  |       | j                  |       y y r   )codegen_multi_output!skip_size_stride_alignment_checksr  r  r/  s     r   r0  zMultiOutput.codegen=!  s:    $$T*55%%g.**73 6r   c                    t         |   d ||gd       t        j                  j	                  |       | _        t        j                  j                  |        || _        || _        y r  )	r@  r  rn   r   r-  r   r.  rk  r
  )r  rC  r  rk  r
  rB  s        r   r  zMultiOutput.__init__C!  sQ     	vw3GG++D1		""4(1R.r   c                p    | j                   d   }t        |t              sJ |       |j                  |      S r  )r_  r   r   r%  )r  r^  r  s      r   r%  z MultiOutput.get_free_symbol_usesP!  s7     [[^
*f-9z9-..}==r   c                p    t        | j                        dk(  xr t        | j                  d   t              S )NrA   r   )r   r_  r   r  r  s    r   r  zMultiOutput.should_allocateX!  s0    4;;1$ 
t{{1~'89	
r   c                    | j                   D cg c]>  }t        |t              r,t        |j	                               dkD  r|j                         @ c}S c c}w r  )r_  r   r	  r   r5  rh  r	  s     r   r5  z(MultiOutput.get_inputs_that_alias_output]!  sN     {{
#~.C4467!; LLN
 	
 
s   AAr  rZ  )
rC  r  r  r   rk  zlist[tuple[Any, ...]]r
  r   r   r   re  rC  rg  )
r   r   r   r0  r  rY   r%  r  r5  r  r  s   @r   rp
  rp
  <!  s~    4 38SS S '	S
 ,0S 
S M*$)>!>	!> +>


r   rp
  c                     e Zd ZU dZded<   d,dZd-dZd.dZd/dZd0dZ	d1d	Z
d2d3dZd4dZd5dZd6dZd,dZd5dZ	 d7	 	 	 	 	 d8dZd9dZd:dZ	 d7	 	 	 	 	 d;dZd<dZd=dZd>dZd?dZd@dZdAdZd,dZd,dZdBdZdCdZd1dZdCd Z d@d!Z! e"d       	 d7	 	 	 dDd"       Z#dEd#Z$dFd$Z%d2dGd%Z&e'dHd&       Z(dId'Z)dHd(Z*dAd)Z+e'dJd*       Z,d1d+Z-e-Z.y
)Krp  zC
    TensorBox / StorageBox allow in-place mutation of Tensors
    r   rB  c                6    | j                   j                         S r   r  r  s    r   r  z!MutableBox.has_exceeded_max_readsp!  r  r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.get_devices!  r  r   c                6    | j                   j                         S r   rv  r  s    r   r  zMutableBox.make_loaderv!      yy$$&&r   c                6    | j                   j                         S r   )rB  r  r  s    r   r  zMutableBox.make_indexery!      yy%%''r   c                6    | j                   j                         S r   )rB  r(  r  s    r   r(  zMutableBox.get_stride|!  r  r   c                6    | j                   j                         S r   r  r  s    r   rh  zMutableBox.get_name!  r  r   Nc                8    | j                   j                  |      S r   )rB  r  r  s     r   r  zMutableBox.has_large_inner_fn!  s    yy++I66r   c                8    | j                   j                  |      S r   r  r  s     r   r  zMutableBox.mark_reuse!  r  r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.realize_hint!  r
  r   c                6    | j                   j                         S r   )rB  re  r  s    r   re  zMutableBox.unwrap_view!  r
  r   c                6    | j                   j                         S r   )rB  r  r  s    r   r  zMutableBox.is_input_buffer!      yy((**r   c                6    | j                   j                         S r   )rB  r	  r  s    r   r	  zMutableBox.freeze_layout!  s    yy&&((r   c                :    | j                   j                  ||      S r   )rB  r  r  s      r   r  z*MutableBox.freeze_layout_with_stride_order!  s     yy88NNr   c                8    | j                   j                  |      S r   )rB  r  r  s     r   r  z(MutableBox.freeze_layout_with_fill_order!  s    yy66u==r   c                8    | j                   j                  |      S r   )rB  r  r  s     r   r  z(MutableBox.freeze_layout_with_same_order!  s    yy66v>>r   c                :    | j                   j                  ||      S r   )rB  r  r  s      r   r  z+MutableBox.freeze_layout_with_exact_strides!  s     yy99-WWr   c                6    | j                   j                         S r   )rB  r  r  s    r   r  zMutableBox.get_read_writes!  r
  r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.get_reads!  r  r   c                6    | j                   j                         S r   r  r  s    r   r   zMutableBox.num_reads!  r  r   c                6    | j                   j                         S r   r  r  s    r   r"  zMutableBox.get_storage_numel!  r  r   c                6    | j                   j                         S r   r   r  s    r   r'  zMutableBox.get_reduction_type!  r  r   c                6    | j                   j                         S r   r  r  s    r   r)  zMutableBox.get_reduction_size!  r  r   c                6    | j                   j                         S r   r  r  s    r   r+  zMutableBox.is_extern!  r  r   c                6    | j                   j                         S r   )rB  r-  r  s    r   r-  zMutableBox.is_no_op!  r  r   c                8    | j                   j                  |      S r   r$  r  s     r   r/  zMutableBox.constant_to_device!  s    yy++F33r   c                6    | j                   j                         S r   )rB  r1  r  s    r   r1  zMutableBox.get_mutation_names!  r  r   c                6    | j                   j                         S r   )rB  r3  r  s    r   r3  zMutableBox.get_operation_name!  r  r   c                6    | j                   j                         S r   )rB  r5  r  s    r   r5  z'MutableBox.get_inputs_that_alias_output!  s    yy5577r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.realize!  r  r   c                8    | j                   j                  |      S r   r  r$  s     r   r%  zMutableBox.get_free_symbol_uses!  s     yy--m<<r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.get_read_names!  r  r   c                6    | j                   j                         S r   )rB  r  r  s    r   r  zMutableBox.get_defining_op!  r
  r   c                8    | j                   j                  |      S r   )rB  r  r  s     r   r  zMutableBox.codegen_reference!  s    yy**622r   c                6    | j                   j                         S r   rB  r  r  s    r   rC  zMutableBox.layout!  s     yy((**r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.get_layout!  r  r   c                6    | j                   j                         S r   r
  r  s    r   r  zMutableBox.get_output_spec!  r
  r   c                6    | j                   j                         S r   rs  r  s    r   r   zMutableBox.get_size!  r  r   c                .    | j                   j                  S r   )rB  r   r  s    r   r   zMutableBox.dtype!  s    yyr   c                t   t        | j                  t              rQt        |       j                   dt        | j                        j                   d}d}| j                  j                  }n&t        |       j                   d}| j                  }d}|t        t        |            |g}dj                  |      S )Nr  z))r  r  )r   rB  rp  r   r   r  r   r  )r  line0endlr  r  s        r   r  zMutableBox.__str__!  s    dii,Dz**+1T$))_-E-E,FaHEDIINNEDz**+1-EIIED 3u:

 yyr   rC  rK  rO  rQ  rS  rU  r   rV  rX  r8  rY  rZ  r[  r]  r^  r_  r`  rb  rc  rd  rH  rD  rf  rg  re  r9  r<  rJ  rB  rA  r?  )/r   r   r   rh  r   r  r  r  r  r(  rh  r  r  r  re  r  r	  r  r  r  r  r  r  r   r"  r'  r)  r+  r-  r/  r1  r3  r5  r  rY   r%  r  r  r  rn  rC  r  r  r   r   r  r  r   r   r   rp  rp  h!  sb    L2&'(&$7+('+) ;@O"O37O	O
>? HMX/X@DX	X
+%%-..%$4..8# L)$)=!=	!= *=
*+3 + +&+$   " Hr   rp  c                  V    e Zd Zeedd              Zeedd              Zedd       Zy)r   c                     y r   r   rB  s    r   r  zTensorBox.create"  s    FIr   c                     y r   r   r
  s    r   r  zTensorBox.create"  s    +.r   c                N    t        | t              r| S t        t        |             S r   )r   r   r   rf  r
  s    r   r  zTensorBox.create"  s"    d12KD)**r   N)rB  r   r   r   )rB  r   r   r   )rB  r   )r   r   r   r   rk  r  r   r   r   r   r    "  s@    I  I.  .+ +r   c                  X    e Zd ZdZddZddZddZddZddZddZ	ddZ
dd	Zdd
Zy)rf  z7
    StorageBox allow in-place mutation of Tensors
    c                    t        | j                  t        t        f      r4| j                  j	                         t
        j                  j                  v S yr  )r   rB  rz  rH  rh  rn   r   r  r  s    r   r  zStorageBox.is_input_buffer"  s:    dii+!?@99%%'177+?+???r   c                    t        | j                  t              xr4 | j                  j                         t        j
                  j                  v S r   )r   rB  r  rh  rn   r   rW  r  s    r   r  zStorageBox.is_module_buffer"  s9    tyy>3 :		""$(9(99	
r   c           	        t         j                  | j                        r| j                  j                         S t	        | j                  t
        t        t        t        f      sJ t        | j                               | j                  j                         }| j                  j                         }| j                  j                         }|J t        d t        || j                  j                         | j                  j!                         d      | j                        | _        t"        j$                  j'                  | j                        | j                  _        t"        j$                  j+                  | j                         | j,                  | j                  _        || j                  _        || j                  _        | j                  j(                  S )NF)r   r   r   rG  r]  )r   r|  rB  rh  r   r  r9  rB  r{  r   r  r  r  rx  r  r  r   rn   r   r-  r   r.  rn  rr  rp  )r  rr  rp  r   s       r   r  zStorageBox.realize"  sP   ""499-99%%''$))iD$%GH 	
$IIK
 	
H ii//1II++-	%%'!!!"!ii))+YY'')	 	
	 00;			""499- LL		 +		'		yy~~r   c                    t        | j                  t        t        f      r9| j                  j	                         j
                  dkD  r| j                          yyy)zL
        Called on buffers we expect to be forced to realize later.
        rA   N)r   rB  r  r9  r  nontrivial_read_countr  r  s    r   r  zStorageBox.realize_hint<"  sF    
 tyy9i"89		**,BBQFLLN G :r   c                   ddl m} | j                         D cg c])  } ||      st        j                  j                  |      + }}|syt        |      }t        |      }t        |      }||k\  xr ||z  dk\  xr ||k(  S c c}w )Nr   )is_nonfreeable_buffersFr   )	r  r
  r  rn   r   get_dep_size_hintr  r  r	  )r  r  r
  r  size_of_reads
total_sizemax_sizemin_sizes           r   $has_accumulated_enough_reads_by_sizez/StorageBox.has_accumulated_enough_reads_by_sizeF"  s    @ ~~'
)#. GG%%c*
 

 '
}%}%)# %X%*%H$	

s   .Bc                
   t        | j                  t              xrh | j                         t        j
                  kD  xsE | j                         xs3 t        j                  d uxr | j                  t        j                        S r   )	r   rB  r  r   rB   realize_acc_reads_thresholdr   realize_acc_reads_size_thresholdr
  r  s    r   r  z!StorageBox.has_exceeded_max_readsY"  sq    $))Y/ 	
NNvAAA &&( 77tC ==;;		
r   c                F   |dkD  rt        | j                  t        t        f      r{t	        | j                        r3| j                  j                         ddg}t        fd|D              ry| j                         t        j                  kD  xs | j                         S y)zj
        A heuristic to decide if we should realize a tensor
        that is used multiple times.
        rA   expsigmoidc              3  :   K   | ]  }|j                   v   y wr   )used_ops)r   r   opcounts     r   r   z5StorageBox.should_realize_on_reuse.<locals>.<genexpr>o"  s     @qG,,,@s   TF)r   rB  r  r9  r!  r  r  r   rB   realize_reads_thresholdr  )r  r  	heavy_opsr
  s      @r   should_realize_on_reusez"StorageBox.should_realize_on_reusee"  s    
 19DII	9/EFdii ))446"I.	@i@@ 6#A#AA -**, r   c                H    | j                  |      r| j                          y y r   )r
  r  r  s     r   r  zStorageBox.mark_reusew"  s    ''.LLN /r   c                6    | j                   j                         S r   r  r  s    r   r   zStorageBox.num_reads{"  r  r   NrC  rH  r8  )r  r   r   r   )r  r   r   r   rX  rc  )r   r   r   rh  r  r  r  r  r
  r  r
  r  r   r   r   r   rf  rf  "  s4    

:
&

$%r   rf  c                  0    e Zd ZU ded<   ded<   dZded<   y)Subgraphr   r   r3	  graph_moduleNzOptional[GraphLowering]r   )r   r   r   r   r   r   r   r   r
  r
  "  s    
I&&%)E")r   r
  c                    | D cg c]$  }t        |t              r|j                         n|& } }t        t	        d | D                    t        |       k  S c c}w )Nc              3  2   K   | ]  }t        |        y wr   )r	  )r   r  s     r   r   z'_has_aliased_buffers.<locals>.<genexpr>"  s     ;"V*;r|  )r   rH  re  r   r:   )buffersr  s     r   _has_aliased_buffersr
  "  s^      !+6? COG 
 z;7;;<s7|KKs   )Ac                       e Zd ZU dZdZded<   dZded<   dZded<   	 	 	 	 	 	 	 	 d fdZe		 	 	 	 	 	 dd	       Z
dd
Z xZS )InvokeSubgraphz.
    Ir node for the invoke_subgraph HOP.
    NOptional[Subgraph]r"	  Optional[Sequence[IRNode]]operandsrB  c                    t         |   d ||       || _        t        j                  j                  |       | _        t        j                  j                  |        y r  )r@  r  r"	  rn   r   r-  r   r.  )r  r"	  r
  rC  rB  s       r   r  zInvokeSubgraph.__init__"  sQ     	 	 	

 !GG++D1		""4(r   c                   ddl m} t        j                  j                  }d}|j
                  j                  d      x}rrd}|j                  t        j                  j                  j                  u r7|j                  d   t        j                  j                  j                  u sJ d}|d   |d }nd}|j                  t        j                  j                  j                  u r7|j                  d   t        j                  j                  j                  u sJ d}|j                  |d }|D 	cg c]  }	|	j
                  d	    }}	|D 	cg c]  }	| j                  |	       }}	g }
t        |      D ]H  \  }}t!        |t"        t$        f      r|
j'                  |       .|
j'                   ||||                J |
}|j                  |t        j                  j)                  |j*                  ||j,                  
      |_        t        j.                  |j                        5   |j                  j0                  |  ddd       |j                  j2                  }d}|D ]$  }t!        |t"              r|j5                         } n |J t7        ||t9        |            	 	 	 	 	 	 dfd}t        |      D cg c]  \  }} |||       }}}|_        |S c c}	w c c}	w # 1 sw Y   xY wc c}}w )zFor each operand, get a realized input, force it to have the same
        strides as the subgraph inputs, then use an InvokeSubgraphrA   )constrain_to_fake_tensorNeager_input_valsr   r   r   r   r  rY  r 	  r(	  r  )r"	  r
  rC  c                Z   t        | t        t        f      r| S | j                         }|J t	        t        || j                         | j                         | j                         | j                         j                  | j                         j                        t        |fgd      S )NrY  T)r
  )r   r   r  r  rp
  rE  r  r   r(  r  rF  rG  r   )rQ  indr   invoke_subgraphs      r   create_outputz,InvokeSubgraph.create.<locals>.create_output"  s     &#8:N"OP**,)))"%$..0#__.%002%00299"("3"3"5"?"? $C[M6: r   )rQ  r   r
  r   r   z?Union[ShapeAsConstantBuffer, NoneAsConstantBuffer, MultiOutput])r  r
  rn   r   r  rW  r  r  r  rl   ru
  r_  r   r
  r  r   r   r   rU  r  r!	  r
  r   r&	  r'	  graph_outputsr  r
  r  rB  )r  r"	  r
  r
  r  fake_operandsr
  rF  fx_operandsr   new_operandsr   operandrB  r   r
  r   rQ  outsr
  s                      @r   r  zInvokeSubgraph.create"  s    	7 ww+++00445GHHHF""eii&<&<&I&II#((+uyy/E/E/U/UUUU,Q/8MF""eii&<&<&I&II#((+uyy/E/E/U/UUUU '++FG4K4?@qQVVE]@M@ AI!I1#"3"3A"6!I!I%'%h/ 	LC'$9>#JK##G,##,WmC6HI		  >>!WW22((,&mm 3 HN
 $$X^^4 3"""M23 ....  	Gg'<= ++-	 !!!($F3
		!$	L	. ;DG:LMYQfa(MM"&O A "J*3 3T Ns   /KK"%K'>K3'K0c                &    |j                  |        y r   )codegen_invoke_subgraphr/  s     r   r0  zInvokeSubgraph.codegen#  r	  r   )r"	  r
  r
  r=  rC  r  r   r   )r"	  r
  r
  r   r   zElist[Union[ShapeAsConstantBuffer, NoneAsConstantBuffer, MultiOutput]]r  )r   r   r   rh  r"	  r   r
  rB  r  r  r  r0  r  r  s   @r   r
  r
  "  s     $(H '+/H(/*.G'.
) 
),<
)FW
)	
) dd,2d	Nd dL.r   r
  c                       e Zd ZU dZdZded<   dZded<   dZded<   dZded	<   dZ	d
ed<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ
edd       Ze	 	 	 	 	 	 	 	 	 	 dd       ZddZddZ xZS )Conditionala  
    IR node representing torch.cond

    Attributes:
        predicate: A boolean scalar tensor determining which branch to execute.
        operands: Input tensors passed to both true and false subgraphs.
        true_subgraph: Subgraph executed when predicate is True.
        false_subgraph: Subgraph executed when predicate is False.
        outputs: MultiOutput nodes representing the conditional's outputs.
    Nr  	predicater
  r
  r
  true_subgraphfalse_subgraphOptional[Sequence[MultiOutput]]rB  c                   || _         || _        || _        || _        t	        |g|      \  }}t
        	|   d |||       ||| _        t        j                  j                  |       | _        t        j                  j                  |        y N)r   rC  r_  r  )r  r
  r  r  _split_by_sym_typer@  r  r  rn   r   r-  r   r.  )
r  r  r
  r  r  rC  r  sym_argsrf  rB  s
            r   r  zConditional.__init__##  s     # *, 2I3I3I J+"	 	 	
 (%6D"GG++D1		""4(r   c                R    t        | t              r| S | j                  j                  S r   )r   r   r   r,  )r   s    r   _maybe_exprzConditional._maybe_expr?#  s    aHvv{{r   c                	   | j                  |      }|D cg c]  }| j                  |       }}t        j                  j                  j                  d   }t        |t              sJ t        |             t        d |D              sJ |D cg c]  }t        t        |      j                  d   ! }}t        j                  j                  j                  d   }	 	 	 	 	 	 dd}	||fD ]  }
|
j                  t        j                  j                  |
j                  ||
j                        |
_        t        j                  |
j                        5   |
j                  j                   |   |	|
j                  j"                  |      |
j                  _        ddd        |j                  J |j                  J |j                  j"                  }|j                  j"                  }d|fd|ffD ]!  \  }}t%        |      st'        d	| d
|        t)        |      t)        |      k(  s	J ||f       t+        t-        ||            D ]  \  }\  }}|j/                         |j/                         k(  s
J |||f       |j1                         |j1                         k(  s
J |||f       |j3                         j4                  |j3                         j4                  k(  rJ |||f        t7        d ||gz   D              }t9        t        j                  j:                  j<                  t        j                  j                  j                  j?                  dd            }|J d       tA        ||||tC        |      |      }t+        t-        |t        j                  j                  j                  d               D cg c]  \  }\  }}tE        tG        |j/                         |j/                         n||j1                         |jI                         D cg c]  }t@        jK                  |       c}|jM                         D cg c]  }t@        jK                  |       c}|j3                         j4                  |j3                         jN                        |tP        |fg       }}}}}||_)        |S c c}w c c}w # 1 sw Y   xY wc c}w c c}w c c}}}}w )zNCreate a Sequence of IRNodes from a conditional statement (see .lowering.cond)r&  c              3  <   K   | ]  }t        |t                y wr   )r   r8   r  s     r   r   z%Conditional.create.<locals>.<genexpr>U#  s     <1:a&<r  r  c                    g }t        | |      D ]e  \  }}t        |t              r|j                  |       (|j                  t        j                  t        |      |j                         d             g |S NFr  )r   r   r   r  r  r  r   r   )r
  fake_tensorsretrQ  r4  s        r   _require_exact_stridesz2Conditional.create.<locals>._require_exact_stridesY#  sr     C #M< @ 	f&;<JJv&JJ$::%f-t{{}E ; 		 Jr   Nr
  true_fnfalse_fnzVOutput aliasing is currently not supported in compiled torch.cond. The outputs of the z% subgraph of torch.cond are aliased: c              3  \   K   | ]$  }t        |t              s|j                          & y wr   )r   r   r  )r   os     r   r   z%Conditional.create.<locals>.<genexpr>#  s)      
a!67 LLN
r   r  zcannot determine devicer  )r  r
  r  r  rC  r  rY  )r
  r=  r  zSequence[torch.Tensor]r   r   )*r  rn   r   r  r   r   r   r   r   r   r8   rW  r!	  r
  r   r&	  r'	  r
  r
  r	  r   r   r   r  r  r  rF  r  r5   r   r   r  r  r  rp
  rE  r   r  r   rG  r   rB  )r  r  r  r  r
  r   r
  r
  fake_outputsr  r"	  true_outputsfalse_outputsr   rB  r   t_of_or   r  conditionalrQ  merged_outputr  s                           r   r  zConditional.createE#  s    %%i0	2:;QC%%a(;; ! 4 4 9 9" =+x0C${2CC0<<<<<<GHqdA++E2HHww++007	+	0	 	$ !(+ 	H~~%!"!6!6,,#0"*-- "7 "
 ((8 &HNN&&6 4J 44l4HNN0 	" }}(((~~)))}}22 44(,7*m9TU 	MD'#L1$**./TU\T]_ 	 < C$66U}8UU6&s<'GH 	UMAzS>>#s~~'77F!S#F7==?cmmo5D3}D5>>#**cnn.>.E.EET3PS}TE	U  
+
 

 6GG&&GG  %%))*=tD
 !<#<<!!!#$F3/
: /8L!''"6"6";";E"BC/)
 
( +*FM'  ((*6 ",,. **,@M@R@R@TU"+11"5U>K>R>R>T8://3 ",,.55$//1;; 
 
2 &_ <
 I: v V
s>   S $SAS
AS!
S9S!
S)A	S!

S	
S!
c           	         |j                  |        |j                  | j                         | j                  t	        | di              y r
  )codegen_conditionalr
  rh  rB  r   r/  s     r   r0  zConditional.codegen#  s9    ##D)88MMOT\\749Lb+Q	
r   c                    t        | dd       x}rKt        t        j                  j                  j
                  |      }|J t        |j                               S t               S r
  r"
  r#
  s      r   r  z$Conditional.get_unbacked_symbol_defs#  r%
  r   )r  r   r
  r=  r  r
  r  r
  rC  r  r  rz
  r   r   )r   zUnion[int, torch.SymInt]r   zUnion[int, sympy.Expr])
r  r   r  r
  r  r
  r
  zlist[TensorBox]r   zlist[MultiOutput]r  r  )r   r   r   rh  r  r   r
  r  r  rB  r  rk  r  r  r  r0  r  r  r  s   @r   r  r  #  s    	 #'I&+/H(/(,M%,)-N&-/3G,3)) #)  	)
 !) ") H) 
)8  
 zz z 	z
 "z 
z zx
 r   r  c                    g }g }| D ]?  }t        |t              r|j                  |j                         /|j                  |       A ||fS r   )r   r   r  r,  )r   non_sym_argsr	  r  s       r   r  r  #  sS     LH %c01OOCHH%$	% \!!r   c                       e Zd ZU dZdZded<   dZded<   dZded<   dZded<   dZ	d	ed
<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ
edd       Zedd       Ze	 	 	 	 	 	 	 	 	 	 	 	 dd       ZddZddZ xZS )	WhileLoopzSThe IR node for while_loop and while_loop_stack_output. It supports input mutation.Nr
  carried_inputsadditional_inputsr
  cond_subgraphbody_subgraphr  rB  c                .   || _         || _        || _        || _        t	        g ||      \  }}	t
        
|   d ||	|       ||| _        || _        t        j                  j                  |       | _        t        j                  j                  |        y r  )r%  r&  r'  r(  r  r@  r  r  stack_outputrn   r   r-  r   r.  )r  r%  r&  r'  r(  rC  r  r*  r	  rf  rB  s             r   r  zWhileLoop.__init__#  s     -!2** 21n101!
+ 	"	 	 	
 (%6D"(GG++D1		""4(r   c                   t        |       s| S | D cg c]$  }t        |t              r|j                         n|& }}t	               }g }t        | |      D ]b  \  }}t        |      |v r%|j                  t        j                  |             8|j                  t        |             |j                  |       d |S c c}w r   )r
  r   rH  re  r:   r   r	  r  r  rG  r  )r%  r  unwrapped_buffersseen_buffersr  original_inputunwrapped_buffers          r   _clone_aliased_inputszWhileLoop._clone_aliased_inputs$  s    #N3!! )
 %/v$GF VS
 
 )31303NDU0V 	.,N,"#|3l55nEF  $4!56n-	. !
s   )B<c                    t        | t              r| S t        | t        t        f      rt        |       S t        | t              rt        j                  |       S t        dt        |              )NzNYI unsupported output type: )r   r   rf  rH  rp
  r  rH
  r   )r   s    r   _maybe_wrap_as_tensor_boxz#WhileLoop._maybe_wrap_as_tensor_box'$  s\    c9%Jj/:;S>![)##C((!>tCykJKKr   c                "   ddl m} 	 	 	 	 	 	 dd}t        j                  j                  j
                  d   }t        j                  j                  j
                  d   }	||	z   }
|
D cg c]  }|j                  d    }}|D cg c]  }|j                  d    }}|	D cg c]  }|j                  d    }}|D cg c]  }| j                  |       }}t        j                  |      } |||      }|D cg c]  }| j                  |       }} |||      }||z   }||fD ]	  }|j                  t        |
t              sJ t        |
             t        j                  j                  |j                  |
|j                        |_        t        j                   |j                        5   |j                  j"                  |  ||u rYt%        |j                  j&                        t%        |      k(  sJ  ||j                  j&                  |      |j                  _        ddd        |j                  r|j                  sJ |j                  j&                  }|j                  j&                  }t)        |      rt+        d	|       t%        |      d
k(  sJ |       |d   }t        |t,              sK|j/                         t0        j2                  k(  sJ |       t%        |j5                               dk(  sJ |       t%        |      dkD  sJ d       |d   j7                         }|J t%        |      t%        |      k(  s	J ||f       t9        t;        ||            D ]  \  }\  }}	 	 	 	 	 	 dd} ||j5                         |j5                                 ||j=                         |j=                                |j7                         |j7                         k(  sJ ||||f       |j/                         |j/                         k(  rJ |||f        |J t?        t        j                  j@                  jB                  t        j                  j                  j                  jE                  dd            }t        ||||tG        |      ||      }|j                  8t        |j                  jH                  t0        jJ                  jL                        sJ  ||j                  jH                  |      d   }tO        |      }|D cg c]  }||   	 } }tQ        |       }!g }"g |_)        g |_*        |rt%        |      dk(  sJ d       t9        t        j                  j                  j                  d         D ]  \  }}#tW        tY        |#jZ                  |#j\                  |#j_                         D $cg c]  }$t`        jc                  |$       c}$|#je                         D %cg c]  }%t`        jc                  |%       c}%      |tf        |fg      }&|jR                  ji                  |&       |"ji                  |&        nt9        |      D ]
  \  }}#||v rb|t%        |      k  sJ d       tk        |!      }'|jT                  ji                  tm        |'jn                  |'|             |"ji                  |'       mtW        tY        |#j7                         |#j/                         |#j5                         |#j=                         |#jq                         jr                        |tf        |fg      }&|jR                  ji                  |&       |"ji                  |&        t;        ||"      D ]g  \  }(})|(ju                         t        j                  jv                  v s1t        j                  jx                  j{                  |)ju                                i |"S c c}w c c}w c c}w c c}w c c}w # 1 sw Y   xY wc c}w c c}$w c c}%w )zcreate the while_loop IR node. stack_output controls whether it stack
        each iterations' output, which is necessary for training.
        r   )check_input_alias_and_mutationc                P   t        |       t        |      k(  sJ g }t        | |      D ]{  \  }}t        |t        j                        rKt
        j                  |      }|j                  t        j                  ||j                         d             k|j                  |       } |S r  )r   r   r   r  r`  r$  r2  r  r  r  r   )tensor_boxesr  r  r  fknew_tbs         r   r  z0WhileLoop.create.<locals>._require_exact_strides@$  s     |$L(9999ClL9 #Bb%,,/ '@@DFJJ$::"BIIKu ;  JJrN)#* Jr   r&  r  Nr
  zOutput aliasing is currently not supported in compiled torch.while_loop. The outputs of the body_fn subgraph of torch.while_loop are aliased: rA   z9torch.while_loop is assumed to have at least one operand.c                    t        |       t        |      k(  sJ t        | |      D ]/  \  }}t        j                  j                  j                  ||       1 y r   )r   r   rn   r   r   r4  )	lhs_exprs	rhs_exprslhsrhss       r   _guard_list_equalsz,WhileLoop.create.<locals>._guard_list_equals$  sN     9~Y777 #Iy 9 <HCGG$$11#s;<r   r  r  )r%  r&  r'  r(  rC  r  r*  r   z-NYI: while_loop_stack_output input mutations.)r   r   r   r   zonly carries can be mutated.)r   r   r   r   rF  )r6  r=  r  z,list[Union[int, torch.SymInt, torch.Tensor]]r   r   )r;   Sequence[Union[int, sympy.Expr]]r<  r@  r   r   )>torch._higher_order_ops.utilsr4  rn   r   r  r   rW  r  r$  r0  r   r   r   r!	  r
  r   r&	  r'	  r   r
  r
  r	  r   r  r  r   r   r  r   r   r(  r5   r   r   r  r  modulefxGraphModuler:   r  rB  r  rp
  rE  r   r   r   r  r  r   r   r  r  rC  rC  r  rF  rh  r  r	  r  )*r  cond_fnbody_fnr%  r&  r*  r4  r  fx_carried_inputsfx_additional_inputsfx_all_inputsr   fake_all_inputsfake_carried_inputsfake_additional_inputscarried_inputs_additional_inputs_
all_inputsr"	  cond_outputsbody_outputsr  r   r   rS  bor?  r  
while_loopmutated_idxsmutated_idx_setr   rA  mutated_inputs_iterall_outputsrQ  r  r  	multi_outmutated_inputra  r   s*                                             r   r  zWhileLoop.create2$  s    	Q	*	F	 	: GG0055b9 ww3388<),@@2?@Q166%=@@6GHqvve}HH9M!NA!&&-!N!N9GHA3,,Q/HH#99/J0BUV<MNqc//2NN3 6
 %'99
 '* 	H~~%!-:OD<OO:!"!6!6,,#0"*-- "7 "
 ((8 &HNN&&8  7*"8>>#?#?@C/E      8N$NN88/84 	4 }}..}}22}}22- XXdWeg  < A%3|3%O!23;;=EJJ.11.qzz|$),1,):" 	
G	
" A))+!!!?#s<'88 	
;
 	
8 %S,%GH 	AKAxB<;<;< < r{{}bkkm<r}}@ ==?bmmo5J2r67JJ5<<>R\\^3@aR[@3	A" !!!5GG&&GG  %%))*=tD

 *0!!$F3/%	

 }}(ZMM  %(("6"6.
 	
 

 6MM  /

 %\25DEc*S/EE #>2$&
&(
#'1, ?,  ))=)=)B)B5)IJ .V'%}}$llDJKKMRbk55b9RFLmmoV 7 7 ;V	 C[M		 "")))4""9-.  )6 2V/)^!44T6TT4$()<$=M//66&}';';]JW  &&}5 +##)#4#4#6"("2"2"4!'!2#)#4#4#6#)#4#4#6#=#= #
!I &&--i8&&y1-20 NK8 	@HC||~!5!55 ++//?	@ U AH!NH O t F" SVs=   (__! _&<_+8_0/A7_5`:`*`5_?	c           	         |j                  | | j                         |j                  | j                         | j                  t        | di              y r
  )codegen_while_loopr*  r
  rh  rB  r   r/  s     r   r0  zWhileLoop.codegen%  sA    ""4):):;88MMOT\\749Lb+Q	
r   c                    t        | dd       x}rKt        t        j                  j                  j
                  |      }|J t        |j                               S t               S r
  r"
  r#
  s      r   r  z"WhileLoop.get_unbacked_symbol_defs%  r%
  r   )r%  r=  r&  r=  r'  r
  r(  r
  rC  r  r  rz
  r*  r   r   r   )r%  r=  r   r=  )r   r   r   r   )rE  r
  rF  r
  r%  r=  r&  r=  r*  r   r   rK
  r  r  )r   r   r   rh  r%  r   r&  r'  r(  rB  r  rk  r0  r2  r  r  r0  r  r  r  s   @r   r$  r$  #  s   ]15N.54818(,M%,(,M%,/3G,3)() ,)  	)
  ) ") H) ) 
)D  2 L L WW W )	W
 ,W W 
)W Wr
 r   r$  c                  \     e Zd Z	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZd fdZd	dZ xZS )
r   Nr	  c          	         t         
|   |||||d |       ddlm}  ||      }	|	J |	| _        t
        j                  j                  j                  |	d       | _	        | t
        j                  j                  |	<   y )Nrl
  r   )_get_effect)
r@  r  torch._higher_order_ops.effectsr_  effect_typern   r   effectful_opsr  prev_effect_buffer)r  rC  r  rf  r
  rS  r   r  r_  ra  rB  s             r   r  zEffectfulKernel.__init__%  s     	/ 	 	
 	@!&)&&&&"#''"7"7";";K"N-1k*r   c                    t         |          }| j                  F|j                  j	                  t        j                  | j                  j                                      |S r   )r@  r  rc  r  r  rC   r  rh  )r  rj  rB  s     r   r  zEffectfulKernel.get_read_writes;%  sU    g-/"".!!$$T%<%<%E%E%GH r   c                     yr  r   r  s    r   r	  z EffectfulKernel.has_side_effectsE%  r  r   r   ry
  r`  rC  )r   r   r   r  r  r	  r  r  s   @r   r   r   %  sr     ,02 KO22 2 &	2
 &2 +2 )2 H2 
2:r   r   c                  4    e Zd Z ed       	 d	 	 	 dd       Zy)r|  c                    t               S r   r9   r$  s     r   r%  z!NonTensorObj.get_free_symbol_usesJ%  r  r   NrZ  re  )r   r   r   rY   r%  r   r   r   r|  r|  I%  s,    N+$)!	! ,r   r|  c                  L    e Zd ZU ded<   ded<   ddZdddZddZdd	Zdd
Zy)r  r   r   +Union[FakeScriptObject, torch.ScriptObject]r  c                    | j                   S r   r  r  s    r   rh  zTorchBindObject.get_nameV%  rZ  r   Nc                    | j                   S r   r  r  s     r   r  z!TorchBindObject.codegen_referenceY%  rZ  r   c                    | j                   S r   r  r  s    r   rY  zTorchBindObject.get_value\%  r  r   c                    t        | j                  t        j                        r| j                  S | j                  j                  S r   )r   r  r  ScriptObjectreal_objr  s    r   get_real_objzTorchBindObject.get_real_obj_%  s0    djj%"4"45::::&&&r   c                   | j                         }t        |      ryt        |d      sJ t        |j	                               }t        j                  |      d   }|D cg c]=  }t        |t        j                        r!|j                         |j                         z  ? }}t        j                  t        j                  |d      S c c}w )Nr   __obj_flatten__)rp  r)   r  r   rr  rJ  rT  r   r  r`  r	  numelru  r  operatorr  )r  real_script_obj	flat_dict
flat_elemsr   
flat_sizess         r   get_buf_byteszTorchBindObject.get_buf_bytese%  s    ++-/*(9:::88:;	((3A6
  
!U\\* NNqwwy(

 

 j!<<
s    AC	rU  r   rJ  )r   ri  )r   ztorch.ScriptObjectrc  )	r   r   r   r   rh  r  rY  rp  ry  r   r   r   r  r  Q%  s&    
I66'=r   r  c                  4    e Zd ZU ded<   ded<   ddZd	d
dZy)rU  r   r   rN  r   c                    | j                   S r   r  r  s    r   rh  zGeneratorState.get_name|%  rZ  r   Nc                    | j                   S r   r  r  s     r   r  z GeneratorState.codegen_reference%  rZ  r   rU  r   rJ  )r   r   r   r   rh  r  r   r   r   rU  rU  w%  s    
Ir   rU  c                  r    e Zd ZddZddZdd	dZe	 	 	 	 	 	 	 	 	 	 d
d       Ze	 	 	 	 	 	 	 	 	 	 dd       Zy)_CollectiveKernelc                     yr  r   r  s    r   r  z!_CollectiveKernel.should_allocate%  r  r   c                     yr  r   r  s    r   r	  z"_CollectiveKernel.has_side_effects%  r  r   Nc                \   t        | j                        t        j                  j                  u sJ d       | j                  }||| _        n|j                  j                  | _        |j                  j                  D cg c]  }|j                  s|j                   c}| _
        y c c}w )Nz,Setting cpp kernel needs a valid op_overload)r   r  r  r  r  r  r  r   r  r  r
  )r  r  r  r   s       r   r  z%_CollectiveKernel.set_cpp_kernel_name%  s    D$$%)>)>> 	
:	
> !!&#2D #)>>#6#6D  #NN44.
AFF.
* .
s    B)B)c           
        t         j                  j                  5   | j                  ||g|i |\  }}}}}	d d d        	rJ | d|	        D ]?  }
|
j	                          t         j                  j                  |
j                                A |d   j                         } | t        |      ||      }t        j                  |      }|j                  j                  |D cg c]  }t        t        |      ||       c}       |j                  j                  |D cg c]  }|j                          c}       d|v r`|j                  j                  t        t        |      |d   |             |j                  j                  |d   j                                y y # 1 sw Y   txY wc c}w c c}w )Nr  r   r  r   )rn   r   r  rq  r  ri  rh  r  r\  rJ  tree_leavesr  r  rC  r
  r  )r  r  r_  r   r   _example_outputrf  rg  rS  r  
tensor_argr   rs
  inpsrm  ra  s                   r   create_inplacez _CollectiveKernel.create_inplace%  s    WW 	D #""66CDCFC!	D %E2C1D&EE$% 	?J GG''
(;(;(=>	? Q**,f%
 !!&)&&OST^Jf5sFCT	

 	!!T"Bc3<<>"BCF?##**z8&-P %%fUm&<&<&>? ;	D 	D0 U #Cs   F+*F8(F=+F5c           
        t         j                  j                  5   | j                  ||g|i |\  }}}}}	d d d        	rJ | d|	        D ]#  }
t	        |
t
              r|
j                          % t	        t              r| j                  ||      }|J  | t        |      ||      }t        |      D cg c](  \  }}t        | j                  |      |t        |fg      * c}}|_        t        |j                  |      D ]T  \  }}t        j                   st#        |      r"t         j                  j$                  j'                  |j(                         V |j                  S  | | j                  |      ||      }t        j                   st#        |      s3t         j                  j$                  j'                  |j(                         |g|_        |S # 1 sw Y   xY wc c}}w )Nr  r  )rn   r   r  rq  r   r  r  r   r0
  r  r   rp
  ri
  rB  r   rB   rq
  rk   r  r  r   )r  r  r_  r   r   rl  rf  rg  rS  r  r  r   rs
  r   rI  rm  s                   r   create_out_of_placez%_CollectiveKernel.create_out_of_place%  s    WW 	D #""66CDCFC!	D %F3D2E&FF$% 	%Jj/:""$	% nd+__[.AF%%%!0F "+>!: Av ((0AYKFN  #6>>>B <V::BSC GG--11#((;	<
 >>!$$^4F 66>O? ))--fkk:$XFNMe	D 	D.s   G#-G0#G-rC  r   r  )
r  r|   r_  zUnion[IRNode, list[IRNode]]r   r   r   r   r   r   )
r  r|   r_  z!Union[TensorBox, list[TensorBox]]r   r   r   r   r   z+Union[list[MultiOutput], _CollectiveKernel])	r   r   r   r  r	  r  r  r  r  r   r   r   r~  r~  %  s    

( )@)@ ,)@ 	)@
 )@ 
)@ )@B 99 29 	9
 9 
59 9r   r~  c                  P     e Zd Z	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddZ xZS )_AllReduce_KernelNr	  c          	     T    t         |   |||||d |       | j                  d       y )Nrl
  +aoti_torch_cpu__c10d_functional_all_reduce_r@  r  r  r
  s	           r   r  z_AllReduce_Kernel.__init__ &  =     	/ 	 	
 	  !NOr   c                    |j                  d       |j                  |        t        | j                  t              r| j                  |       y y Nz+torch/csrc/inductor/aoti_torch/c/shim_cpu.hinclude_extra_headerr  r   rC  r  r  r/  s     r   r0  z_AllReduce_Kernel.codegen6&  @    $$%RS,,T2dkk6*%%g. +r   r   ry
  r  r5	  r  s   @r   r  r  &  sw     ,0P KOPP P &	P
 &P +P )P HP 
P,/r   r  c                  P     e Zd Z	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddZ xZS )_AllReduceKernelNr	  c          	     T    t         |   |||||d |       | j                  d       y )Nrl
  *aoti_torch_cpu__c10d_functional_all_reducer  r
  s	           r   r  z_AllReduceKernel.__init__?&  s=     	/ 	 	
 	  !MNr   c                    |j                  d       |j                  |        t        | j                  t              r| j                  |       y y r  r  r/  s     r   r0  z_AllReduceKernel.codegenU&  r  r   r   ry
  r  r5	  r  s   @r   r  r  >&  sw     ,0O KOOO O &	O
 &O +O )O HO 
O,/r   r  c                  v     e Zd Z	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	 fdZd
dZddZedd       Zd fdZ xZ	S )_WaitKernelNr	  c          	     T    t         |   |||||d |       | j                  d       y )Nrl
  +aoti_torch_cpu__c10d_functional_wait_tensorr  r
  s	           r   r  z_WaitKernel.__init__^&  r  r   c                    |j                  d       |j                  |        t        | j                  t              r| j                  |       y y r  r  r/  s     r   r0  z_WaitKernel.codegent&  r  r   c                r   | j                   d   }t        |t              sJ t        |t              r2|j                   d   }t        |t              sJ t	        |             |gS t        |t
              rC|j                   d   }t        |t              r"|j                  d   \  }}|j                   |   gS g S g S r  )r_  r   r   r~  r   rp
  rk  )r  ra  r   collr   r   s         r   get_volatile_readsz_WaitKernel.get_volatile_reads{&  s    kk!n#v&&&c,-

1Aa(1$q'1(3J[) ::a=D$ 12Q3C())I Ir   c                r   t         j                  j                  5  | j                  ||      \  }}}}}d d d        rJ | d|         | t	        |j                               |      }|j                  j                  t        t	        |j                               ||             y # 1 sw Y   zxY w)Nr  r  )	rn   r   r  rq  r\  r  r  r  rC  )	r  r  ra  r  rf  rg  rS  r  rs
  s	            r   create_waitz_WaitKernel.create_wait&  s    WW 	0 ""63/!	0 %E2C1D&EE$cnn./
 	&&:S^^-=>VL	
!	0 	0s   B--B6c                    t         |          }| j                         }|D ]>  }|j                  j	                  t        j                  |j                                      @ |S r   )r@  r  r  r  r  rC   r  rh  )r  rj  volatile_readsvrrB  s       r   r  z_WaitKernel.get_read_writes&  sZ    g-/002  	GB!!,"6"6r{{}"EF	Gr   r   ry
  r  r  )r  r|   ra  r   r   r   r`  )
r   r   r   r  r0  r  r  r  r  r  r  s   @r   r  r  ]&  s     ,0P KOPP P &	P
 &P +P )P HP 
P,/2 
 
* r   r  c                2   t        | t        t        f      rt        |       S t        | t        t
        f      r2t        t        j                            }| D ]  }|t        |      z  } |S t        | t        j                        rt        |       S t               S r   )r   r7   r    r2   r   r   r:   r   r"   r  r  r`  r   r  r  s      r   r  r  &  s    !h%&$Q''	At}	%u||$& 	0A,Q//A	0	Au||	$$Q''|r   c                2   t        | t        t        f      rt        |       S t        | t        t
        f      r2t        t        j                            }| D ]  }|t        |      z  } |S t        | t        j                        rt        |       S t               S r   )r   r7   r    r1   r   r   r:   r   r"   r  r  r`  r  s      r   r  r  &  s~    !h%&A	At}	%u||$& 	'A#A&&A	'	Au||	$A|r   c                   t        | t              rt        | j                  t              rt        | j                  j                  t              r'| j                  j                  j                  d|       y t        | j                  j                  t              rU| j                  j                  j                  d|       t        | j                  j                  t              r_t        | j                  j                  j                  t              r1| j                  j                  j                  j                  d|       y t        | j                  j                  t              r| j                  j                  j                  sft        | j                  j                  j                  d   t              r4| j                  j                  j                  d   j                  d|       y y y y y y y )Nrr  r   )r   r   rB  rf  r  r  rg  rx  rp
  rk  r_  )r  r  s     r   assign_origin_noder  &  sV    &)$FKK)Lfkk&&.KK//qA((&1KK//qA&++**N;
  %%uA   %%88J 6;;++[9((00fkk..55a8&AKK$$++A.AA-QRS B 1 : 2 *M$r   )r   r   r   zTypeIs[Union[int, Integer]])r   r   r   r   )r   r   r   r  )r   r\  r   z&Callable[[Sequence[_T]], Sequence[_T]])r   z&Callable[[Sequence[_U]], Sequence[_V]]r   z&Callable[[Sequence[_T]], Sequence[_U]]r   rN  r   )r   z(Sequence[Union[int, torch.SymInt, Expr]]r   zOptional[ShapeEnv]r   r\  )r   Sequence[Union[int, Integer]]r   r\  r=  )r   r   r   r   r   r   )r   r   r   r   r   r  )r   r  r   r   r   zOptional[torch.Tensor])r  zOptional[Sequence[_T]]r   z Optional[Sequence[Optional[_T]]])r   z2Union[IRNode, OutputSpec, torch.device, None, str]r   rI  )r   z&Union[IRNode, torch.device, None, str]r   r   )r   zUnion[Buffer, TensorBox]r%  r   r   r   )r5  rT  r6  rT  r7  rT  r   r   )rI  r   rJ  z"Sequence[Union[int, torch.SymInt]]r   r   )rY  r3	  r   r   )r_  r=  r   r	  )r   zUnion[Expr, Sequence[Expr]]r   r@  r   rm   )r&  r   r   r@  r%  r   r   r  )r   r\  r   r  rF  r    r   rR  r  )TFNFN)r   r   r  r   r  r   r  rC  r  r   r  rC  r   ztuple[StorageBox, Layout])r   r   r  r  r   r   r7  )r   rT  r7  rT  r   r   )r   r@  r   r   )r   r  r   zTypeIs[Sequence[IRNode]])r
  r=  r   r   )r   r4	  r   z-tuple[list[ShapeAsConstantBuffer], list[Any]])r   r   r   r  )r  r   r  ztorch.fx.Noder   r   (O  
__future__r   rl  r1  ri  ru  r  loggingrt  textwraprp  collections.abcr   r   r   r   r   r	   r
   enumr   r   typingr   r   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   r   r   r   unittest.mockr   r   r    r!   r"   torch._export.serde.schema_exportserder
  rB
  r6
  r
  rK  r
  torch._loggingr  torch.fxtorch.utils._pytree_pytreerJ  torch._dynamo.utilsr#   torch._export.serde.serializer$   *torch._higher_order_ops.auto_functionalizer%   torch._inductorr&   r  r(   torch._library.opaque_objectr)   torch._prims_commonr*   r+   r,   r-   r.   %torch.fx.experimental.symbolic_shapesr/   r0   r1   r2   r3   r4   r5   r6   r7   torch.fx.noder8   torch.utils._ordered_setr:   torch.utils._python_dispatchr;   torch.utils._sympy.functionsr<   r=   r>   r?   torch.utils._sympy.symbolr@   r  rB   rC   codegen.commonrD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   	loop_bodyrN   ops_handlerrO   rP   rQ   rR   runtime.benchmarkingrS   runtime.hintsrT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   virtualizedrl   rm   rn   "torch._library.fake_class_registryro   rp   rq   codegen.cutlass.templaterr   codegen.wrapperrs   r   rt   ru   r   r   r  __version__rr  rq  ImportErrorrv   rw   rx   ry   r   rz   r  r{   r  r  r<  r|   	getLoggerr   ry  r  r2  r   r   r   r   r,
  r   r   r   r   r   r   r!  r#  r   r   r   r   r  r  r  r!  r0  r;  rO  r[  rb  ri  r   rp  r  r  r  r  r  r7  r9  r  r  r  r   r(  rB  rs  r{  r  r  rD  r  r  rd  r   r  r   r  r  rH  rl  rx  r  r  r  r  r  r  r  rE  r  rE  rQ  rU  r\  rf  rg  r  rz  r  r  r  r   rx  r{  r?  r   r   PrimitiveInfoTyperR  rl  rp  r  r  r  r  r  ry  r  r  r  r  r  r  rC  r  r	  r	  r	  r7	  rl	  r}	  r	  r	  r	  r	  r	  r	  r	  r	  r   r   r	  r	  r|
  r
  r  rp
  rp  r   rf  r
  r
  r
  r  r  r$  r   r|  r  rU  r~  r  r  r  r  r  r  r   r   r   <module>r     s   "          M M :      U T   ' ' 2 2 , ,   $ $ ( ? M # 2 7 
 
 
  / ? Q Q * "     N N - :     0 * ) CB&95$% "(OY'''NJ t_T]T]T]CI&) &C,-) -

 5 5uzz7U7U UVi Vg!			8??4	8yy~~'T  k	sDk!12K8STU	i 	) d#  $$$D44 , ! $  TX	1>P	 TX
	1
>P

 
 E 
 E 
 O 
 O .2&*:!%
>9
>
>;('7*    
	*$G$G/$G $GN';|, |,~	 UH H HV |
F |
 |
~& %
 %
 %
P 
i 
 
F |$y!y!uu=)< 8  JN<N<N +<NBF<N<N~ b
 b
 b
N '+1:
#  &	& "8D>8D>"BH"LMY M7S9 7St#1 #L[
+ [
| F
5 F
 F
T 	 	 	 V5 V Vr	 !<@=A999 9 :	9
 9 ;9 9x:	$ ^
v ^
 ^
B Y Y Yx -( - -` A9( A9 A9H !( ! !H w; w wt Sh S Sl & & &R_A _AD 6  " K| K K$ S| S S'9	(<7 7  `
Z `
 `
F	C& C[HV [H|!Gf !GHT %{ %D   .V* V*r UEV] E EP U&fi & & & 
K 
[ 
& 6  ( F    Ut4_ t4 t4nH
_ H
VD> DN #udCeCeT<Q6R1SST C CL"| "
\B. \B~5N 50( (6N >Z%N Z%z5455 UR? R Rj E9 EP UR< R Rj U"l " "J
/ 
((" ("V
V 
B=L =@%
 %
P
- 
$1
\ 1
hLl L^))| ))Z/, /d< 8B5 B$8- 826)l 6)r))| ))X9T 9Tx,E ,E^21| 21j-L -8<;< <;~ U  
p& pf U
. 
 
@   2 
  '
, '
X T T Tn+
 +m% m%` U*v * *L U}.\ }. }.@ U ,    D"
"2" Ux  x  x v	)n )X6  "=l "= "=J \  Y Yx/) />/( />S# Sr  T]r  NJs   j 	jj