
    	iH                        d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlmZ d dlmZ d dlmc mZ d dlmc mZ d dlmZmZ d dlmZ d dlm Z m!Z! g d	Z"d
e#de$e#e#f   fdZ%dee&   dejN                  de(e#ef   fdZ)dejN                  de(e#ef   dej$                  jT                  fdZ+d)dej$                  jT                  dej$                  jT                  fdZ,dejT                  dejT                  fdZ-dejT                  de.ejN                     de.ejN                     de.ejN                     fdZ/ej`                  ejb                  ejd                  ejf                  ejh                  ejj                  ejl                  ejn                  ejp                  ejr                  ejn                  ejt                  ejv                  gZ<ejz                  ej|                  gZ?ej`                  ej                  ejb                  ej                  ejd                  d iZBde.ejN                     de(e#ejT                  f   fdZCde.ejN                     de(e#ejT                  f   de(ejT                  ejT                  f   fdZD G d d       ZEd*d!ZFd"eEdeGfd#ZH G d$ d%      ZIdej                  fdej$                  jT                  d&ee(e#ef      d'e&ej                     dej$                  jT                  fd(ZKy)+    N)defaultdict)Iterable)Enum)AnycastOptional)ArgumentTarget)	ShapeProp)fuse_conv_bn_evalfuse_linear_bn_eval)matches_module_patternreplace_node_modulefuseremove_dropoutextract_subgraphmodules_to_mkldnnreset_modulesMklSubgraphgen_mkl_autotuneruse_mkl_length	UnionFindoptimize_for_inferencetargetreturnc                 F    | j                  dd      ^ }}|r|d   |fS d|fS )zp
    Splits a qualname into parent path and last atom.
    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
    .   r    )rsplit)r   parentnames      s/var/www/vps2.regionflexible.com/Desarrollo/venv/lib/python3.12/site-packages/torch/fx/experimental/optimization.py_parent_namer$   %   s3    
 MM#q)MVT6!9,,B,,    patternnodemodulesc                 j   t        |j                        dk(  ry|j                  d   |f}t        | |      D ]z  \  }}t        |t        j
                        s y|j                  dk7  r yt        |j                  t              s y|j                  |vr yt        ||j                           |usz y y)Nr   Fcall_moduleT)
lenargszip
isinstancefxNodeopr   strtype)r&   r'   r(   nodesexpected_typecurrent_nodes         r#   r   r   /   s     499~"&))A,!5E'*7E': 
#|,0??m+,--s3g-++,-]B
 r%   
new_modulec                     t        | j                  t              s!t        dt	        | j                               t        | j                        \  }}||| j                  <   t        ||   ||       y )NExpected str target, got )r.   r   r2   AssertionErrorr3   r$   setattr)r'   r(   r7   parent_namer"   s        r#   r   r   C   sa     dkk3'8dkk9J8KLMM$T[[1K%GDKKGK $
3r%   modelc                    t         j                  t         j                  ft         j                  t         j                  ft         j
                  t         j                  ft         j                  t         j                  fg}|st        j                  |       } |r$t        | t        j                  j                        st        j                  |       }n| }t        |j!                               }t        j                  |j"                        }|D ]  }|j$                  D ]  }t'        |||      st)        |j*                  d   j,                        dkD  r8||j*                  d   j.                     }	||j.                     }
|
j0                  sp|d   t         j                  t         j                  t         j
                  fv rt3        |	|
      }nt5        |	|
      }t7        |j*                  d   ||       |j9                  |j*                  d          |j;                  |         t        j                  ||      S )z
    Fuses convolution/BN and linear/BN layers for inference purposes.
    Will deepcopy your model by default, but can modify the model inplace as well.
    r   r   )nnConv1dBatchNorm1dConv2dBatchNorm2dConv3dBatchNorm3dLinearcopydeepcopyr.   torchr/   GraphModulesymbolic_tracedictnamed_modulesgraphr4   r   r+   r,   usersr   track_running_statsr   r   r   replace_all_uses_with
erase_node)r=   inplaceno_tracepatternsfx_modelr(   	new_graphr&   r'   first_layerbnfused_layers               r#   r   r   M   s    
BNN#	BNN#	BNN#	BNN#	H e$:eUXX-A-AB$$U+8))+,Ghnn-I +OO 	+D%gtW=tyy|))*Q.%diil&9&9:T[[)--1:"))RYY		!BB"3K"DK"5k2"FK#DIIaL';G**499Q<8$$T*	++" >>(I..r%   c                     t        j                  |       } G d dt        j                   j                        } ||      j	                         S )z5
    Removes all dropout layers from the module.
    c                   D     e Zd Zdedeedf   deeef   def fdZ	 xZ
S )&remove_dropout.<locals>.DropoutRemoverr   r,   .kwargsr   c                     t        | j                  |   t        j                        r*t	        |      dk7  rt        dt	        |             |d   S t        |   |||      S )Nr   z Expected 1 arg for Dropout, got r   )r.   
submodulesr?   Dropoutr+   r:   superr*   )selfr   r,   r^   	__class__s       r#   r*   z2remove_dropout.<locals>.DropoutRemover.call_module|   s\     $//&12::>t9>(+KCPTI;)WXXAww*64@@r%   )__name__
__module____qualname__r
   tupler	   rL   r2   r   r*   __classcell__)rd   s   @r#   DropoutRemoverr]   {   sE    	A 	A(-hm(<	AFJ3PS8n	A	A 	Ar%   rj   )r/   rK   rI   Transformer	transform)r=   rV   rj   s      r#   r   r   u   sB       'H	A-- 	A (#--//r%   orig_moduler4   inputsoutputsc                 Z  	 t        j                         }i 	|D ]"  }|j                  |j                        }|	|<   $ |D ]  }|j	                  |	fd      }|	|<    |j                  |D cg c]  }	|   	 c}       |j                          t        j                  | |      S c c}w )z
    Given lists of nodes from an existing graph that represent a subgraph, returns a submodule that executes that subgraph.
    c                     |    S N )xenvs    r#   <lambda>z"extract_subgraph.<locals>.<lambda>   s    s1v r%   )r/   Graphplaceholderr"   	node_copyoutputlintrJ   )
rm   r4   rn   ro   rW   inputnew_noder'   rz   ru   s
            @r#   r   r      s     
I"$C ((4E
  &&t-=>D	 8fc&k89NN>>+y11 9s   /B(c                 ,    t        j                  |       S rr   )	th_mkldnnMkldnnBatchNorm)a_s     r#   rv   rv      s    !:!:1!= r%   c                    i }| D ]  }|j                   dk(  st        |j                  t              s!t	        dt        |j                               ||j                     }t        |      t        v sot        t        |         |t        j                        }t        |t        j                        st	        dt        |             t        j                  |      ||<   t        |||        |S )z
    For each node, if it's a module that can be preconverted into MKLDNN,
    then we do so and create a mapping to allow us to convert from the MKLDNN
    version of the module to the original.
    r*   r9   zExpected nn.Module, got )r1   r.   r   r2   r:   r3   
mkldnn_maprI   floatr?   ModulerG   rH   r   )r4   r(   old_modulesr'   
cur_moduler7   s         r#   r   r      s     /1K ?77m#dkk3/$'@dkkAR@S%TUU -JJ:-'Z(89*ekkR
!*bii8(+CDDTCU)VWW*.--
*CJ'#D':>? r%   r   c                     | D ]q  }|j                   dk(  st        |j                  t              s!t	        dt        |j                               ||j                     }||v sbt        ||||          s y)za
    Maps each module that's been changed with `modules_to_mkldnn` back to its
    original.
    r*   r9   N)r1   r.   r   r2   r:   r3   r   )r4   r(   r   r'   r   s        r#   r   r      ss      L77m#dkk3/$'@dkkAR@S%TUU -J[(#D';z3JKLr%   c                   ,    e Zd Zdej                  fdZy)r   fx_graphc                 <    || _         g | _        g | _        g | _        y rr   )r   r4   start_nodes	end_nodes)rc   r   s     r#   __init__zMklSubgraph.__init__   s     $&
*,(*r%   N)re   rf   rg   r/   rw   r   rs   r%   r#   r   r      s    + +r%   r   c                 D     dddt         dt        f fd}|S )aW  
    This generates a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by running it with the example_inputs.

    Example usage:
        heuristic = gen_mkl_autotuner(example_inputs, iters=10)
        fast_model = optimization.optimize_for_inference(model, heuristic)
    NrN   r   c                    | j                   }
F| j                  j                  
| j                  j                  t	        
      j                  	       |D cg c]!  }t        j                  |j                        # c}t        t        t        j                     | j                  D cg c]  }|j                  d    c}      }t        
| j                   ||      fd} |fd      }t#        j$                  j                   t'        j)                                       |fd      }||k  S c c}w c c}w )Nr   c                     t              D ]	  } |          t        j                         }t              D ]	  } |          t        j                         |z
  S rr   )rangetime)fr   beginiterswarmups      r#   	benchmarkz?gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.benchmark  sO    6] IIKE5\ 99;&&r%   c                       D  cg c]  } | j                          c}  D  cg c]  } | j                          c} S c c} w c c} w rr   )	to_mkldnnto_dense)isample_inputs	submodules    r#   rv   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>
  s<    &/1WA!++-1W&X!"

 1Ws	   ?Ac                         S rr   rs   )r   r   s   r#   rv   z>gen_mkl_autotuner.<locals>.use_mkl_heuristic.<locals>.<lambda>  s    	=(A r%   )r   r   owning_moduler   r   	propagaterI   randnshaper   listr/   r0   r   r,   r   r4   r   rN   rL   rM   )rN   input_nodesr'   output_argsr   mkl_timeno_mkl_timer   r   example_inputsrV   r   r   r   s          @@r#   use_mkl_heuristicz,gen_mkl_autotuner.<locals>.use_mkl_heuristic   s   ''~~33H..44Kh)).9=HITTZZ0I4=EOO*TD499Q<*TU$Xu{{KU		' 
 	OO!!((*+		
   AB+%%3 J*Ts   &D>.E
)r   bool)r   r   r   r   rV   r   s   ``` @@r#   r   r      s0     HK &  &  &  &D r%   rN   c                 2    t        | j                        dkD  S )z
    This is a heuristic that can be passed into `optimize_for_inference` that
    determines whether a subgraph should be run in MKL by checking if there
    are more than 2 nodes in it
       )r+   r4   )rN   s    r#   r   r     s     u{{ar%   c                   >    e Zd Zd ZdefdZdedefdZdedefdZy	)
r   c                 0    d g|z  | _         dg|z  | _        y )Nr   r!   size)rc   ns     r#   r   zUnionFind.__init__%  s    ,06A: !sQw	r%   vc                 @    || j                   |<   d| j                  |<   y )Nr   r   )rc   r   s     r#   make_setzUnionFind.make_set)  s    A		!r%   r   c                     | j                   |   }||k(  r|S |t        d      | j                  |      | j                   |<   t        t        | j                   |         S )NzParent is None)r!   r:   findr   int)rc   r   pars      r#   r   zUnionFind.find-  sV    kk!n8H; !1223ACQ((r%   r   bc                    | j                  |      | j                  |      }}||k(  r|S | j                  |   | j                  |   k  r||}}|| j                  |<   | j                  |xx   | j                  |   z  cc<   y rr   )r   r   r!   )rc   r   r   s      r#   joinzUnionFind.join6  so    yy|TYYq\16H99Q<$))A,&aqAA		!		!$r%   N)re   rf   rg   r   r   r   r   r   rs   r%   r#   r   r   $  s9    '# )c )c )%c %c %r%   r   pass_configtracerc                    dddt         id}|i }|j                  |       |d   rt        |       } |d   rt        |       } |d   du r| S t	        |d   t
              st        d	      d|d   vrt        d
      |d   d   } |       }|j                  t        j                  |             t        j                  |j                         t        | j                               } G d dt              }t        j                         D ]  }|j"                  }	|j$                  dk(  r||j&                     }
t)        |
      t*        v r|j,                  }	t/        |
j1                         d      }||j2                  t4        j6                  k7  rt9        d      |j:                  t5        j:                  d      k7  rWt9        d      |j$                  dk(  r=|j&                  t*        v r|j,                  }	n|j&                  t<        v r|j>                  }	|	|j"                  k7  s|	|j>                  k(  rtA        d |jB                  D              sFjE                  |      5  t        jF                  |jB                  fd      }ddd       tI        tJ        t        jL                  jN                           |_!        jQ                  |      5  jS                  dd|f      }|jU                  |       |f|_!        ddd        tW        t        j                         |      }|_,        j                   D ]  }|j$                  dk(  s|j&                  dk(  s#|jB                  d   }t        |jZ                        }|D ]D  }|j$                  dk(  s|j&                  dk(  s#|jU                  |       j]                  |       F t_        |jZ                        dk(  sj]                  |        t_        j                         }ta        |      fd}tc        j                         D ]7  \  }}|j$                  dk(  r(|j&                  dk(  r||_2        jg                  |       >|j$                  dk(  rJ|j&                  dk(  r; ||jB                  d         t9        d       ||jB                  d         |_4        |jj                  D cg c],  }t	        |t        jl                        r ||       ||      . }}t_        |      dk(  rtA        d |D              rt9        d      to        |      }|d   |_8        |dd D ]  }js                  |d   |        : tu        fd      }j                   D ]  }tw        |d      r7|jy                  |jp                           j                   j{                  |       tw        |d      r7|jy                  |jd                           j|                  j{                  |       tw        |d       s|jy                  |jh                           j~                  j{                  |        |j                         D ]q  } ||      r|j|                  |j~                  z   D ]3  }|jB                  d   }|jU                  |       j]                  |       5 t        |j                   ||       s d}j                   D ]&  }|j&                  dk(  s|j&                  dk(  s"|dz  }( t        j                  t              j                  d!|       j                          t        j                  |       }|S # 1 sw Y   xY w# 1 sw Y   lxY wc c}w )"a  
    Performs a set of optimization passes to optimize a model for the
    purposes of inference. Specifically, the passes that are run are:
    1. Conv/BN fusion
    2. Dropout removal
    3. MKL layout optimizations

    The third optimization takes a function `use_mkl_heuristic` that's used
    to determine whether a subgraph should be explicitly run in MKL layout.

    Note: As FX does not currently handle aliasing, this pass currently
    assumes nothing aliases. If that isn't true, use at your own risk.
    T	heuristic)conv_bn_fuser   mkldnn_layout_optimizeNr   r   r   Fz+mkldnn_layout_optimize config is not a dictz4Heuristic not found in mkldnn_layout_optimize configc                       e Zd ZdZdZdZy)*optimize_for_inference.<locals>.MklSupportr   r      N)re   rf   rg   NOYESUNKNOWNrs   r%   r#   
MklSupportr   l  s    r%   r   r*   z)this pass is only for torch.float modulescpuz!this pass is only for CPU modulescall_functionc              3   :   K   | ]  }|j                   d k(    yw)r   N)r   ).0args     r#   	<genexpr>z)optimize_for_inference.<locals>.<genexpr>  s     I3::3Is   c                 *    j                  d| f      S )Nr   )call_method)r   r   s    r#   rv   z(optimize_for_inference.<locals>.<lambda>  s    )=)=kA4)P r%   r   r   r   r   c                     t        | d      rj                  | j                        S t        | d      rj                  | j                        S y )Ncolorstart_color)hasattrr   r   r   )r   ufs    r#   	get_colorz)optimize_for_inference.<locals>.get_color  s@    1g77177##1m$771==))r%   z!Expected color for to_dense inputc              3   $   K   | ]  }|d u  
 y wrr   rs   )r   r   s     r#   r   z)optimize_for_inference.<locals>.<genexpr>  s     1191s   zFound None in cur_colorsr   c                      t               S rr   )r   )r   s   r#   rv   z(optimize_for_inference.<locals>.<lambda>  s    H@U r%   r   r   	end_colorzmkldnn conversions: %s)Gr   updater   r   r.   rL   RuntimeErrortracerG   rH   r/   rJ   rootrM   r   r   r4   r   r1   r   r3   mkldnn_supportedr   next
parametersdtyperI   r   r:   devicemkldnn_supported_unknownr   anyr,   inserting_beforemap_argr   rh   r'   r	   inserting_aftercreate_noderQ   r   r   rO   rR   r+   r   	enumerater   r   r   all_input_nodesr0   sortedr   r   r   r   r   appendr   r   valuesr   logging	getLoggerre   infor{   )r=   r   r   default_pass_configr   
cur_tracerr(   r   r'   supports_mkldnnr   sample_parametermkldnn_argsdense_xr   prv_noderO   user	num_nodesr   cur_idxr   
cur_colorsother_colormkldnn_graphsrN   prvmkldnn_conversionsresultr   r   s                                @@r#   r   r   @  s   & #."?
 {+>*U+,u%34=)*BCTJHII-.FGGQRR+,DEkRJe 45HNN:??H-$()<)<)>$?GT  X^^$ "'$--77m# -JJ#33",..#'
(=(=(?#F #/'--<,G  (..%,,u2EE,-PQQWW'{{..",.. 88","4"4jmm+*"4"44ItyyII**40  jjIIP
 U277#3#34kBDI))$/ '"..}j4'R**73 $w' '?"'J $D$8'BK&H  	*77m#z(Ayy|H$E .77m+{0J..x8''-. 4::!###D)	* HNN#I	9	B$ #8>>2 477m#{(B&DKK WW%$++*C1&.$%HII&tyy|4DN --a)Q<+ !J  :!#1j11$%?@@
+J#ADJ)!"~ 4
1{34-42 -88U,VM J4!"''$**-.44;;DA4'"''$"2"234@@GGM4%"''$..12<<CCDIJ %%' = '))EOO; *iil**3/##D)* %++w<=  $;;+%
)B!#$ h$$%=?QRMMO^^E8,FMK ' 'fs   $$]".]/1]<"],	/]9	)FF)
   r   )LrG   r   operatorr   collectionsr   collections.abcr   enumr   typingr   r   r   rI   torch.fxr/   torch.nnr?   torch.nn.functional
functionalFtorch.utils.mkldnnutilsmkldnnr   torch.fx.noder	   r
   torch.fx.passes.shape_propr   torch.nn.utils.fusionr   r   __all__r2   rh   r$   r3   r0   rL   r   r   r   r   r   r   r   rB   rF   rC   ReLU	MaxPool2d	AvgPool2dAdaptiveAvgPool2drelu	transposesigmoid
avg_pool2dadaptive_avg_pool2dr   addmulr   MkldnnConv2dMkldnnLinearr   r   r   r   r   r   r   r   Tracerr   rs   r%   r#   <module>r#     s       # $  & &      & & * 0 H - -sCx -d^#%7759#s(^(4
''4 cN48=4%/ %/588?? %/P0")) 0		 0(22=2 M2 "'']	2. IIIINNGGLLLL	JJ	OO	MMFFLL & %LL(,,7 IIy%%IIy%%NN=
T"''] T#ryy.5I ,L=L#ryy.!L bii*+L$+ +.b +  $  % %< -1 iir88??r$sCx.)r Or XX__	rr%   