AntonV HF Staff commited on
Commit
45db6f4
·
1 Parent(s): e8134cb

rm rf remote code and add notice

Browse files
Files changed (1) hide show
  1. README.md +11 -9
README.md CHANGED
@@ -2619,6 +2619,8 @@ language:
2619
 
2620
  For example, if you are implementing a RAG application, you embed your documents as `search_document: <text here>` and embed your user queries as `search_query: <text here>`.
2621
 
 
 
2622
  ## Task instruction prefixes
2623
 
2624
  ### `search_document`
@@ -2630,7 +2632,7 @@ This prefix is used for embedding texts as documents, for example as documents f
2630
  ```python
2631
  from sentence_transformers import SentenceTransformer
2632
 
2633
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2634
  sentences = ['search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten']
2635
  embeddings = model.encode(sentences)
2636
  print(embeddings)
@@ -2645,7 +2647,7 @@ This prefix is used for embedding texts as questions that documents from a datas
2645
  ```python
2646
  from sentence_transformers import SentenceTransformer
2647
 
2648
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2649
  sentences = ['search_query: Who is Laurens van Der Maaten?']
2650
  embeddings = model.encode(sentences)
2651
  print(embeddings)
@@ -2660,7 +2662,7 @@ This prefix is used for embedding texts in order to group them into clusters, di
2660
  ```python
2661
  from sentence_transformers import SentenceTransformer
2662
 
2663
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2664
  sentences = ['clustering: the quick brown fox']
2665
  embeddings = model.encode(sentences)
2666
  print(embeddings)
@@ -2675,7 +2677,7 @@ This prefix is used for embedding texts into vectors that will be used as featur
2675
  ```python
2676
  from sentence_transformers import SentenceTransformer
2677
 
2678
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2679
  sentences = ['classification: the quick brown fox']
2680
  embeddings = model.encode(sentences)
2681
  print(embeddings)
@@ -2689,7 +2691,7 @@ from sentence_transformers import SentenceTransformer
2689
 
2690
  matryoshka_dim = 512
2691
 
2692
- model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True)
2693
  sentences = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?']
2694
  embeddings = model.encode(sentences, convert_to_tensor=True)
2695
  embeddings = F.layer_norm(embeddings, normalized_shape=(embeddings.shape[1],))
@@ -2713,7 +2715,7 @@ def mean_pooling(model_output, attention_mask):
2713
  sentences = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?']
2714
 
2715
  tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
2716
- model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True, safe_serialization=True)
2717
  model.eval()
2718
 
2719
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
@@ -2736,9 +2738,9 @@ The model natively supports scaling of the sequence length past 2048 tokens. To
2736
  - tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
2737
  + tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', model_max_length=8192)
2738
 
2739
-
2740
- - model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
2741
- + model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True, rotary_scaling_factor=2)
2742
  ```
2743
 
2744
  ### Transformers.js
 
2619
 
2620
  For example, if you are implementing a RAG application, you embed your documents as `search_document: <text here>` and embed your user queries as `search_query: <text here>`.
2621
 
2622
+ **Notice**: From transformers v5.5.0 and sentence transformers v5.3.0, `trust_remote_code=True` will no longer be necessary. This will only be possible with the text-only series as of now.
2623
+
2624
  ## Task instruction prefixes
2625
 
2626
  ### `search_document`
 
2632
  ```python
2633
  from sentence_transformers import SentenceTransformer
2634
 
2635
+ model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5")
2636
  sentences = ['search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten']
2637
  embeddings = model.encode(sentences)
2638
  print(embeddings)
 
2647
  ```python
2648
  from sentence_transformers import SentenceTransformer
2649
 
2650
+ model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5")
2651
  sentences = ['search_query: Who is Laurens van Der Maaten?']
2652
  embeddings = model.encode(sentences)
2653
  print(embeddings)
 
2662
  ```python
2663
  from sentence_transformers import SentenceTransformer
2664
 
2665
+ model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5")
2666
  sentences = ['clustering: the quick brown fox']
2667
  embeddings = model.encode(sentences)
2668
  print(embeddings)
 
2677
  ```python
2678
  from sentence_transformers import SentenceTransformer
2679
 
2680
+ model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5")
2681
  sentences = ['classification: the quick brown fox']
2682
  embeddings = model.encode(sentences)
2683
  print(embeddings)
 
2691
 
2692
  matryoshka_dim = 512
2693
 
2694
+ model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5")
2695
  sentences = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?']
2696
  embeddings = model.encode(sentences, convert_to_tensor=True)
2697
  embeddings = F.layer_norm(embeddings, normalized_shape=(embeddings.shape[1],))
 
2715
  sentences = ['search_query: What is TSNE?', 'search_query: Who is Laurens van der Maaten?']
2716
 
2717
  tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
2718
+ model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5')
2719
  model.eval()
2720
 
2721
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
 
2738
  - tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
2739
  + tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', model_max_length=8192)
2740
 
2741
+ - model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5')
2742
+ + rope_parameters = {"rope_theta": 1000.0, "rope_type": "dynamic", "factor": 2.0}
2743
+ + model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', rope_parameters=rope_parameters)
2744
  ```
2745
 
2746
  ### Transformers.js