import { ForumComment } from '../formProcessing.js';

class CommentRegexes {
  // A grouping of regular expressions for matching various elements of a comment

  // Note: all regular expressions should have the `d` flag set in order to work with
  // parseComments().
  author;
  score;
  body;

  constructor(author, score, body) {
    if (author === undefined) {
      throw new Error('Must specify author regex');
    }
    if (score === undefined) {
      throw new Error('Must specify score regex');
    }
    if (body === undefined) {
      throw new Error('Must specify body regex');
    }
    this.author = author;
    this.score = score;
    this.body = body;
  }
}

// Bare-bones comment parser that simply puts each completion from the API into the body
// of a comment. It doesn't extract the author or score, nor does it incorporate
// prepopulated fields from the user. But it's better than nothing!

// This returns an array of ForumComment objects.
function parseCommentsBasic(reply) {
  const comments = [];
  for (const completion of reply.completions) {
    const newComment = new ForumComment();
    newComment.body = completion;
    comments.push(newComment);
  }
  return comments;
}

// Parse the completions from the API as forum comments. This aims to handle multiple
// variations in the format of the completions.

// If given, origComment may be a ForumComment object to serve as a starting point for
// the first comment of each completion. Any properties that evaluate to true will be
// preserved. Those that evaluate to false will be overwritten with the relevant
// information from the completion.

// This returns an array of ForumComment objects.
function parseComments(reply, origComment, regexes) {
  const comments = [];
  for (const compl of reply.completions) {
    // Seed the first comment of each completion with the starting point
    let comment = Object.assign(new ForumComment(), origComment);
    // A completion could contain multiple comments, so use a loop to find all of them
    let remaining = compl;
    let advanced = true;
    while (remaining.length && advanced) {
      advanced = false;
      // Author is usually first, so start by checking for that
      const authorMatch = regexes.author.exec(remaining);
      if (authorMatch) {
        // Finding an author indicates this is the start of a new comment.
        // Add the part of the string before the match to the existing comment body so
        // it's not lost. Then push the existing comment if its body isn't too short.
        comment.body = trimCommentBody(
          (comment.body || '').concat(remaining.substring(0, authorMatch.indices[0][0]))
        );
        if (comment.body.length >= 10) {
          comment.key = [reply.id, comments.length].join('-');
          comments.push(comment);
        }
        comment = new ForumComment();
        // If the regex has multiple capturing groups, use the first one non-empty one
        const firstGroupMatch = authorMatch.slice(1).findIndex(Boolean) + 1;
        comment.author = authorMatch[firstGroupMatch];
        remaining = remaining.substring(authorMatch.indices[0][1]);
        advanced = true;
      }
      // Next check for the score
      const scoreMatch = regexes.score.exec(remaining);
      if (scoreMatch) {
        // If the regex has multiple capturing groups, use the first one non-empty one
        const firstGroupMatch = scoreMatch.slice(1).findIndex(Boolean) + 1;;
        if (Number.isNaN(comment.karma)) {
          comment.karma = Number.parseInt(scoreMatch[firstGroupMatch]);
        }
        remaining = remaining.substring(scoreMatch.indices[0][1]);
        advanced = true;
      }
      // Finally, check for the body. The length of the body is unknown, and we don't
      // want it to swallow up remaining comments in the line. So we cut off any
      // characters at the start which aren't part of the body. But we don't set the
      // body itself yet. We'll wait until we see the start of a new comment (or reach
      // the end of the completion).
      const bodyMatch = regexes.body.exec(remaining);
      if (bodyMatch) {
        // If the regex has multiple capturing groups, use the first one non-empty one
        const firstGroupMatch = bodyMatch.slice(1).findIndex(Boolean) + 1;;
        remaining = remaining.substring(bodyMatch.indices[firstGroupMatch][0]);
        advanced = true;
      }
    }
    // Now that we're at the end of the completion, include any remaining portion in the
    // body of the last comment.
    comment.body = trimCommentBody((comment.body || '').concat(remaining));
    if (comment.body.length >= 10) {
      comment.key = [reply.id, comments.length].join('-');
      comments.push(comment);
    }
  }

  return comments;
}

// Remove the following from the beginning or end of the string:
//  - whitespace
//  - 'GW' or 'LW' (often included in completions by when model is imitating LessWrong
//    or GreaterWrong)
//  - punctuation (only from start)
function trimCommentBody(body) {
  body = body.trim();
  // Clean up leading and trailing characters that aren't part of the comment body
  if (body.startsWith('GW ') || body.startsWith('LW ')) {
    body = body.slice(3).trim();
  }
  if (body.endsWith(' GW') || body.endsWith(' LW')) {
    body = body.slice(0, -3).trim();
  }
  return body.replace(/^\p{Punctuation}+/u, '').trim();
}

export {
  CommentRegexes,
  parseComments,
  parseCommentsBasic,
  trimCommentBody,
};
