diff --git a/README.md b/README.md
index 4532d97..2c49a15 100644
--- a/README.md
+++ b/README.md
@@ -233,6 +233,10 @@ If you want to manage in different areas Loras for the 1.3B model and the 14B as
 - loras/1.3B
 - loras/14B
 
+You can also put all the loras in the same place by launching the app with following command line (*path* is a path to shared loras directory):
+```
+python wgp.exe --lora-dir path --lora-dir-i2v path
+```
 
 For each activated Lora, you may specify a *multiplier* that is one float number that corresponds to its weight (default is 1.0) .The multipliers for each Lora should be separated by a space character or a carriage return. For instance:\
 *1.2 0.8* means that the first lora will have a 1.2 multiplier and the second one will have 0.8. 
diff --git a/wan/modules/attention.py b/wan/modules/attention.py
index 868edbe..f70a85b 100644
--- a/wan/modules/attention.py
+++ b/wan/modules/attention.py
@@ -56,10 +56,10 @@ def sageattn_wrapper(
         attention_length
     ):
     q,k, v = qkv_list
-    padding_length = q.shape[0] -attention_length
-    q = q[:attention_length, :, : ] 
-    k = k[:attention_length, :, : ]
-    v = v[:attention_length, :, : ]
+    padding_length = q.shape[1] -attention_length
+    q = q[:, :attention_length, :, : ] 
+    k = k[:, :attention_length, :, : ]
+    v = v[:, :attention_length, :, : ]
     if True:
         qkv_list = [q,k,v]
         del q, k ,v
diff --git a/wan/modules/model.py b/wan/modules/model.py
index 1fb03e1..77292e2 100644
--- a/wan/modules/model.py
+++ b/wan/modules/model.py
@@ -797,11 +797,12 @@ class WanModel(ModelMixin, ConfigMixin):
 
 
     def compute_teacache_threshold(self, start_step, timesteps = None, speed_factor =0): 
-        rescale_func = np.poly1d(self.coefficients)         
+        modulation_dtype = self.time_projection[1].weight.dtype
+        rescale_func = np.poly1d(self.coefficients)
         e_list = []
         for t in timesteps:
             t = torch.stack([t])
-            time_emb =  self.time_embedding( sinusoidal_embedding_1d(self.freq_dim, t.flatten()).to(self.patch_embedding.weight.dtype) )  # b, dim   
+            time_emb =  self.time_embedding( sinusoidal_embedding_1d(self.freq_dim, t.flatten()).to(modulation_dtype) )  # b, dim   
             e_list.append(time_emb)
         best_deltas = None
         best_threshold = 0.01