diff --git a/extensions/perplexity_colors/script.py b/extensions/perplexity_colors/script.py
index 2a986ac4..849e4e63 100644
--- a/extensions/perplexity_colors/script.py
+++ b/extensions/perplexity_colors/script.py
@@ -1,9 +1,14 @@
 import time
 
+import html
+import functools
+import re
+
 import gradio
 import numpy as np
 import torch
 from transformers import LogitsProcessor
+import colorsys
 
 from modules import html_generator, shared
 
@@ -28,7 +33,7 @@ class PerplexityLogits(LogitsProcessor):
         self.verbose = verbose
 
     def __call__(self, input_ids, scores):
-        # t0 = time.time()
+        #t0 = time.time()
         probs = torch.softmax(scores, dim=-1, dtype=torch.float)
         log_probs = torch.nan_to_num(torch.log(probs))  # Note: This is to convert log(0) nan to 0, but probs*log_probs makes this 0 not affect the perplexity.
         entropy = -torch.sum(probs * log_probs)
@@ -42,9 +47,8 @@ class PerplexityLogits(LogitsProcessor):
         if len(self.selected_probs) > 0:
             # Is the selected token in the top tokens?
             if self.verbose:
-                print('Probs: Token after', shared.tokenizer.decode(last_token_id))
-                print('Probs:', [shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1][0]])
-                print('Probs:', [round(float(prob), 4) for prob in self.top_probs_list[-1][0]])
+                print(shared.tokenizer.decode(last_token_id), [shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1][0]],
+                    [round(float(prob), 4) for prob in self.top_probs_list[-1][0]])
             if last_token_id in self.top_token_ids_list[-1][0]:
                 idx = self.top_token_ids_list[-1][0].index(last_token_id)
                 self.selected_probs.append(self.top_probs_list[-1][0][idx])
@@ -60,7 +64,7 @@ class PerplexityLogits(LogitsProcessor):
             pplbar = "-"
             if not np.isnan(perplexity):
                 pplbar = "*" * round(perplexity)
-            print(f"PPL: Token after {shared.tokenizer.decode(last_token_id)}\t{perplexity:.2f}\t{pplbar}")
+            print(f"PPL for token after {shared.tokenizer.decode(last_token_id)}: {perplexity:.2f} {pplbar}")
 
         # Get top 5 probabilities
         top_tokens_and_probs = torch.topk(probs, 5)
@@ -73,14 +77,15 @@ class PerplexityLogits(LogitsProcessor):
         probs = probs.cpu().numpy().flatten()
         self.last_probs = probs  # Need to keep this as a reference for top probs
 
-        # t1 = time.time()
-        # print(f"PPL Processor: {(t1-t0):.3f} s")
+        #t1 = time.time()
+        #print(f"PPL Processor: {(t1-t0):.3f} s")
         # About 1 ms, though occasionally up to around 100 ms, not sure why...
         # Doesn't actually modify the logits!
         return scores
 
 
 # Stores the perplexity and top probabilities
+# global ppl_logits_processor
 ppl_logits_processor = None
 
 
@@ -93,9 +98,9 @@ def logits_processor_modifier(logits_processor_list, input_ids):
 
 def output_modifier(text):
     global ppl_logits_processor
-    # t0 = time.time()
+    #t0 = time.time()
 
-    if not params['active']:
+    if not params['active'] or ppl_logits_processor is None:
         return text
 
     # TODO: It's probably more efficient to do this above rather than modifying all these lists
@@ -111,110 +116,147 @@ def output_modifier(text):
 
     end_part = '</div></div>' if params['probability_dropdown'] else '</span>'  # Helps with finding the index after replacing part of the text.
 
-    i = 0
-    for token, prob, ppl, top_tokens, top_probs in zip(gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list):
+    # Initial space added to deal with some tokenizers...
+    # Used to find where the message started generating, for working with "continue" generations
+    # Doesn't work for longer messages... Not sure how I should handle this
+    full_msg = shared.tokenizer.decode([token_id for token_id in gen_token_ids[:-1]]).strip()
+    # Space at the beginning to account for tokenization spaces...
+    text = ' ' + html.unescape(text)
+    # There was an issue with tab lengths being off by one...
+    # Seems like it might be model-dependent...
+    #text = re.sub(r'( {3,})', r'\1 ', text)
+    # Subtracting 2 to hopefully help with the tokenization spaces and continue issues,
+    # Though it's possible it could overwrite the previous token if it's the same in the last 2 chars
+    i = text.find(full_msg) - 2
+    if i < 0:
+        # Backup, try removing the extra whitespace (needed for continue)
+        i = text.find(full_msg.strip()) - 2
+        if i < 0:
+            i = 0
+
+    #i = 0
+    # Add token index for ability to regenerate from there
+    nonwhitespace_token_found = False
+    for index, token, prob, ppl, top_tokens, top_probs in zip(range(len(gen_tokens)), gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list):
+        # Somehow this works without issues, but not sure how...
+        if not nonwhitespace_token_found and token.strip() == '':
+            #print('Ignoring initial whitespace token...')
+            continue
+        nonwhitespace_token_found = True
+        max_prob = top_probs[0][0]
         color = 'ffffff'
         if params['color_by_probability'] and params['color_by_perplexity']:
-            color = probability_perplexity_color_scale(prob, ppl)
+            color = probability_perplexity_color_scale(prob, max_prob, ppl)
         elif params['color_by_perplexity']:
             color = perplexity_color_scale(ppl)
         elif params['color_by_probability']:
             color = probability_color_scale(prob)
-        if token in text[i:]:
+        if token.strip() in text[i:]:
             if params['probability_dropdown']:
-                text = text[:i] + text[i:].replace(token, add_dropdown_html(token, color, top_tokens, top_probs[0], ppl), 1)
+                text = text[:i] + text[i:].replace(token.replace('\n', ''), add_dropdown_html(token, index, color, top_tokens, top_probs[0], ppl), 1)
             else:
-                text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
+                text = text[:i] + text[i:].replace(token.replace('\n', ''), add_color_html(token, color), 1)
+            
+            # This might be slightly inefficient
             i += text[i:].find(end_part) + len(end_part)
+        else:
+            print('Missing token:', token, '...', text[i:i+20])
 
     # Use full perplexity list for calculating the average here.
-    print('Average perplexity:', round(np.mean(ppl_logits_processor.perplexities_list[:-1]), 4))
-    # t1 = time.time()
-    # print(f"Modifier: {(t1-t0):.3f} s")
+    # Fix issue with mean of empty slice
+    if len(ppl_logits_processor.perplexities_list) > 1:
+        print('Average perplexity:', round(np.mean(ppl_logits_processor.perplexities_list[:-1]), 4))
+    #t1 = time.time()
+    #print(f"Output modifier: {(t1-t0):.3f} s")
     # About 50 ms
-    return text
+    return text.strip() # Remove extra beginning whitespace that some tokenizers add
 
 
 def probability_color_scale(prob):
     '''
     Green-yellow-red color scale
     '''
+    # hue (0.0 = red, 0.33 = green)
+    # saturation (0.0 = gray / white, 1.0 = normal, just leave at 1.0)
+    # brightness (0.0 = black, 1.0 = brightest, use something in between for better readability if you want...)
+    hue = prob * 0.33
+    rv, gv, bv = colorsys.hsv_to_rgb(hue, 1.0, 1.0)
+    # to hex
+    hex_col = f"{int(rv*255):02x}{int(gv*255):02x}{int(bv*255):02x}"
 
-    rv = 0
-    gv = 0
-    if prob <= 0.5:
-        rv = 'ff'
-        gv = hex(int(255 * prob * 2))[2:]
-        if len(gv) < 2:
-            gv = '0' * (2 - len(gv)) + gv
-    else:
-        rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:]
-        gv = 'ff'
-        if len(rv) < 2:
-            rv = '0' * (2 - len(rv)) + rv
-
-    return rv + gv + '00'
+    return hex_col
 
 
 def perplexity_color_scale(ppl):
     '''
     Red component only, white for 0 perplexity (sorry if you're not in dark mode)
     '''
-    value = hex(max(int(255.0 - params['ppl_scale'] * (float(ppl) - 1.0)), 0))[2:]
-    if len(value) < 2:
-        value = '0' * (2 - len(value)) + value
+    # hue (0.0 = red)
+    # saturation (1.0 = red)
+    # brightness (0.0 = black, 1.0 = red)
+    # scale saturation from white to red the higher the perplexity
 
-    return 'ff' + value + value
+    ppl = min(ppl, params['ppl_scale'])  # clip ppl to 0-params['ppl_scale'] for color scaling. 15 should be fine for clipping and scaling
+    sat = ppl / params['ppl_scale']
+    rv, gv, bv = colorsys.hsv_to_rgb(0.0, sat, 1.0)
+
+    # to hex
+    hex_col = f"{int(rv*255):02x}{int(gv*255):02x}{int(bv*255):02x}"
+    
+    return hex_col
 
 
-def probability_perplexity_color_scale(prob, ppl):
+def probability_perplexity_color_scale(prob, max_prob, ppl):
     '''
-    Green-yellow-red for probability and blue component for perplexity
+    Green-yellow-red for relative probability compared to maximum for the current token, and blue component for perplexity
     '''
-
-    rv = 0
-    gv = 0
-    bv = hex(min(max(int(params['ppl_scale'] * (float(ppl) - 1.0)), 0), 255))[2:]
-    if len(bv) < 2:
-        bv = '0' * (2 - len(bv)) + bv
-
-    if prob <= 0.5:
-        rv = 'ff'
-        gv = hex(int(255 * prob * 2))[2:]
-        if len(gv) < 2:
-            gv = '0' * (2 - len(gv)) + gv
-    else:
-        rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:]
-        gv = 'ff'
-        if len(rv) < 2:
-            rv = '0' * (2 - len(rv)) + rv
-
-    return rv + gv + bv
+    hue = prob/max_prob * 0.33
+    rv, gv, _ = colorsys.hsv_to_rgb(hue, 1.0, 1.0)
+    
+    ppl = min(ppl, params['ppl_scale'])  # clip ppl to 0-params['ppl_scale'] for color scaling. 15 should be fine for clipping and scaling
+    bv = ppl / params['ppl_scale']
+    
+    # to hex
+    hex_col = f"{int(rv*255):02x}{int(gv*255):02x}{int(bv*255):02x}"
+    
+    return hex_col
 
 
 def add_color_html(token, color):
-    return f'<span style="color: #{color}">{token}</span>'
+    output = ''
+    output += f'<span style="color: #{color}">{html.escape(repr(token)[1:-1])}</span>'
+    #if '\n' in token or '\r' in token: #token.isspace():
+    #    output += '<br>'
+    return output
 
 
-# TODO: Major issue: Applying this to too many tokens will cause a permanent slowdown in generation speed until the messages are removed from the history.
+# TODO: Might also need message index for the click-to-regenerate feature to work... For now it only works in the last message, which I think is fine.
+
+# TODO: Major issue: Applying this to too many tokens will cause a permanent slowdown in generation speed until the messages are removed from the history. The slowdown seems to be mostly resolved in the current version though
 # I think the issue is from HTML elements taking up space in the visible history, and things like history deepcopy add latency proportional to the size of the history.
 # Potential solution is maybe to modify the main generation code to send just the internal text and not the visible history, to avoid moving too much around.
 # I wonder if we can also avoid using deepcopy here.
-def add_dropdown_html(token, color, top_tokens, top_probs, perplexity=0):
-    html = f'<div class="hoverable"><span style="color: #{color}">{token}</span><div class="dropdown"><table class="dropdown-content"><tbody>'
-    for token_option, prob in zip(top_tokens, top_probs):
+def add_dropdown_html(token, index, color, top_tokens, top_probs, perplexity=0):
+    #print("Token:", token, token.isspace(), '\n' in token or '\r' in token)
+    output = ''
+    # Use the repr to get characters like \n visible. Exclude the quotes around it
+    output += f'<div class="hoverable" id="tok_{index}"><span style="color: #{color}">{html.escape(repr(token)[1:-1])}</span><div class="dropdown"><table class="dropdown-content"><tbody>'
+    for i, token_option, prob in zip(range(len(top_tokens)), top_tokens, top_probs):
         # TODO: Bold for selected token?
         # Using divs prevented the problem of divs inside spans causing issues.
         # Now the problem is that divs show the same whitespace of one space between every token.
         # There is probably some way to fix this in CSS that I don't know about.
         row_color = probability_color_scale(prob)
         row_class = ' class="selected"' if token_option == token else ''
-        html += f'<tr{row_class}><td style="color: #{row_color}">{token_option}</td><td style="color: #{row_color}">{prob:.4f}</td></tr>'
+        # This time we want to include the quotes around it so that we can see where the spaces are.
+        output += f'<tr{row_class}><td id="opt_{index}_{i}" style="color: #{row_color}">{html.escape(repr(token_option))}</td><td style="color: #{row_color}">{prob:.4f}</td></tr>'
     if perplexity != 0:
         ppl_color = perplexity_color_scale(perplexity)
-        html += f'<tr><td>Perplexity:</td><td style="color: #{ppl_color}">{perplexity:.4f}</td></tr>'
-    html += '</tbody></table></div></div>'
-    return html  # About 750 characters per token...
+        output += f'<tr><td>Perplexity:</td><td style="color: #{ppl_color}">{perplexity:.4f}</td></tr>'
+    output += '</tbody></table></div></div>'
+    #if '\n' in token or '\r' in token: #token.isspace():
+    #    output += '<br>' # I imagine this will cause problems sometimes
+    return output  # About 750 characters per token...
 
 
 def custom_css():
@@ -223,8 +265,8 @@ def custom_css():
             display: none;
             position: absolute;
             z-index: 50;
-            background-color: var(--block-background-fill);
-            box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
+            background-color: var(--background-fill-secondary);
+            box-shadow: 0px 8px 16px 0px rgba(0,0,0,1.0);
             width: max-content;
             overflow: visible;
             padding: 5px;
@@ -238,7 +280,7 @@ def custom_css():
         }
 
         .dropdown-content tr.selected {
-            background-color: var(--block-label-background-fill);
+            background-color: var(--background-fill-primary);
         }
 
         .dropdown-content td {
@@ -267,21 +309,106 @@ def custom_css():
         # TODO: This makes the hover menus extend outside the bounds of the chat area, which is good.
         # However, it also makes the scrollbar disappear, which is bad.
         # The scroll bar needs to still be present. So for now, we can't see dropdowns that extend past the edge of the chat area.
-        #.chat {
-        #    overflow-y: auto;
-        #}
+        .chat {
+            overflow-y: auto;
+        }
     """
 
+def custom_js():
+    return """
+
+function sleep(ms) {
+    return new Promise(resolve => setTimeout(resolve, ms));
+}    
+
+// Note that this will only work as intended on the last agent message
+document.addEventListener("click", async function(event) {
+    //console.log(event.target);
+    const id = event.target.id;
+    if (id.includes("opt_")) {
+        const id_parts = id.split("_");
+        const token_index = id_parts[1];
+        const option_index = id_parts[2];
+        // Exclude the quotes and convert newlines... Not sure about the newlines though
+        // TODO: Seems like continuing generation from a newline causes problems whether you add it or not!
+        const token_string = event.target.innerHTML.substring(1, event.target.innerHTML.length-1).replace(new RegExp(String.fromCharCode(92)+String.fromCharCode(92)+"r", "g"), '').replace(new RegExp(String.fromCharCode(92)+String.fromCharCode(92)+"n", "g"), '');
+        //console.log(token_index + ", " + option_index + ", " + token_string);
+        // Get all the previous text (I'm sure there is a more efficient way to do this)
+        var msg_text = ""
+        const msg_html = event.target.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement;
+        var msg_parts = msg_html.childNodes;
+        for (var i = 0; i < msg_parts.length; i++) {
+            var msg_part = msg_parts[i];
+            if (msg_part.nodeType === Node.ELEMENT_NODE) {
+                if (msg_part.nodeName == "DIV") {
+                    var current_token_index = msg_part.id.split("_")[1];
+                    if (current_token_index == token_index) {
+                        // Use the replacement token
+                        // TODO: Don't have access to the tokenizer here, and sometimes there needs to be a space added before this token
+                        msg_text += token_string //.replace(new RegExp(String.fromCharCode(92)+String.fromCharCode(92)+"r", "g"), '').replace(new RegExp(String.fromCharCode(92)+String.fromCharCode(92)+"n", "g"), '');
+                        break;
+                    }
+                    else {
+                        // Replace here or at the end?
+                        var text = msg_part.firstChild.innerHTML.replace(new RegExp(String.fromCharCode(92)+String.fromCharCode(92)+"r", "g"), '').replace(new RegExp(String.fromCharCode(92)+String.fromCharCode(92)+"n", "g"), '')
+                        msg_text += text;
+                    }
+                }
+                else {
+                    // Break tag (hacky workaround because the newline literal can't be parsed here)
+                    //msg_text += String.fromCharCode(10);
+                    // Do nothing???
+                }
+            }
+            else if (msg_part.nodeType === Node.TEXT_NODE) {
+                msg_text +=  msg_part.textContent;
+            }
+        }
+        var textbox = document.querySelector("#chat-input textarea");
+        textbox.focus();
+        textbox.value = msg_text.trimStart() // Fix initial tokenization spaces
+        //console.log(textbox.value);
+        
+        // Add some delays to make sure it's processed correctly. Without these, there's a chance the events don't go through correctly and it doesn't work
+        // It's unknown how long this will take, and probably depends on the size of the message...
+        // It would be better to somehow wait for gradio to update instead of waiting a fixed amount of time.
+        // Hopefully 1 second of delay before starting generation isn't unacceptable.
+        var inputEvent = new Event('input', {
+            bubbles: true,
+            cancelable: true,
+        });
+        textbox.dispatchEvent(inputEvent);
+        var changeEvent = new Event('change', {
+            bubbles: true,
+            cancelable: true,
+        });
+        textbox.dispatchEvent(changeEvent);
+        await sleep(250);
+        document.getElementById("Replace-last").click();
+        // This can take a while to execute
+        await sleep(750);
+        document.getElementById("Continue").click();
+    }
+});
+
+console.log("Custom JS for perplexity_colors loaded");
+"""
 
 # Monkeypatch applied to html_generator.py
 # We simply don't render markdown into HTML. We wrap everything in <pre> tags to preserve whitespace
 # formatting. If you're coloring tokens by perplexity or probability, or especially if you're using
 # the probability dropdown, you probably care more about seeing the tokens the model actually outputted
 # rather than rendering ```code blocks``` or *italics*.
+@functools.lru_cache(maxsize=4096)
 def convert_to_markdown(string):
     return '<pre>' + string + '</pre>'
 
+def convert_to_markdown_wrapped(string, use_cache=True):
+    if use_cache:
+        return convert_to_markdown(string)
+    return convert_to_markdown.__wrapped__(string)
 
+# This is still necessary for formatting to work correctly
 html_generator.convert_to_markdown = convert_to_markdown
 
 
@@ -298,7 +425,7 @@ def ui():
     def update_prob_dropdown_check(x):
         params.update({'probability_dropdown': x})
 
-    active_check = gradio.Checkbox(value=True, label="Compute probabilities and perplexity scores", info="Activate this extension. Note that this extension currently does not work with exllama or llama.cpp.")
+    active_check = gradio.Checkbox(value=True, label="Compute probabilities and perplexity scores", info="Activate this extension. Note that this extension currently does not work with llama.cpp, but it does work with ExLlamav2_HF and llamacpp_HF when set up correctly")
     color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.")
     color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.")
     prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown", info="Hover over a token to show a dropdown of top token probabilities. Currently slightly buggy with whitespace between tokens.")

{token_option}	{prob:.4f}
{html.escape(repr(token_option))}	{prob:.4f}
Perplexity:	{perplexity:.4f}