import { CommentRegexes, parseComments } from './parsing.js';

// For documentation on how to edit formats or add new ones, see docs/prompt.md.

/* Format a post (and optionally the start of a comment) in a format similar to
   LessWrong posts in the training data. Below is an example prompt showing a short post
   and the start of a comment reply.

Example Post Title
By Example Author
Tags: First tag/Second tag
score: 11 (5 votes)

Here's my post. What do you think?

Comments sorted by top scores.

GW comment by Example Commenter 2020-01-23T12:34:56.789Z · score: 4 (3 votes) · LW Have you considered
*/
function formatPromptLWReverseEngineered(post, comment) {
  // Format the post, using a default title of 'Untitled', default author name of
  // 'anonymous' and default karma score of 0. The template ends with 'GW comment by '
  // to encourage the model to generate a comment rather than continuing the post.
  const formattedPost = `\
${post.title || 'Untitled'}
By ${post.author || 'anonymous'}
Tags: ${post.tags.join('/')}
score: ${post.karma || 0} (${Math.floor((post.karma || 0) / 2)} votes)

${post.body}

Comments sorted by top scores.

GW comment by \
`;

  // Format the start of the comment, using a default author name of 'anonymous' and
  // default karma score of 0.
  const formattedComment = [
    `${comment.author || 'anonymous'} ${new Date().toISOString()}`,
    `· score: ${comment.karma || 0} (${Math.floor((comment.karma || 0) / 2)} votes)`,
    `· LW ${comment.body}`,
  ].join(' ');

  let prompt = formattedPost;
  // Include the start of the comment only if the user specified parts of it. Otherwise,
  // we'll leave it out and let the model generate the comment from scratch.
  if (!comment.empty()) {
    prompt += formattedComment;
  }
  return prompt;
}

// Parse the completions from the API as forum comments. This aims to handle multiple
// variations in the format of the completions.

// If given, origComment may be a ForumComment object to serve as a starting point for
// the first comment of each completion. Any properties that evaluate to true will be
// preserved. Those that evaluate to false will be overwritten with the relevant
// information from the completion.

// This returns an array of ForumComment objects. 
function parseCommentsLWReverseEngineered(reply, origComment) {
  // Examples:
  //    GW comment by jacobjacob
  //    msalt on GW Comment byon
  //    habryka (habryka4) 2019-04-12
  const authorRegex = /(?:[cC]omment by |^ ?)([\p{Letter}.-]{1,25}(?: [\p{Letter}.-]{1,25}){0,2}?) (?:\([\w.-]{1,30}\) )?(?:on |\d{4}-\d{2}-\d{2}|· )/du;
  // Example:
  //    score: 44
  const scoreRegex = /score:? (\d+)/d;
  // Examples:
  //    · LW There's definitely some value
  //    · LW · GW It's even closer to
  const bodyRegex = /· [A-Z]{2}\s*([^·]{30}.*)|· [A-Z]{2}\s*(.*)/ds;
  const regexes = new CommentRegexes(authorRegex, scoreRegex, bodyRegex);

  return parseComments(reply, origComment, regexes);
}

export {
  formatPromptLWReverseEngineered,
  parseCommentsLWReverseEngineered,
};
