# frozen_string_literal: true
#
#   irb/ruby-lex.rb - ruby lexcal analyzer
#   	by Keiju ISHITSUKA(keiju@ruby-lang.org)
#

require "prism"
require "ripper"
require "jruby" if RUBY_ENGINE == "jruby"
require_relative "nesting_parser"

module IRB
  # :stopdoc:
  class RubyLex
    ASSIGNMENT_NODE_TYPES = [
      # Local, instance, global, class, constant, instance, and index assignment:
      #   "foo = bar",
      #   "@foo = bar",
      #   "$foo = bar",
      #   "@@foo = bar",
      #   "::Foo = bar",
      #   "a::Foo = bar",
      #   "Foo = bar"
      #   "foo.bar = 1"
      #   "foo[1] = bar"
      :assign,

      # Operation assignment:
      #   "foo += bar"
      #   "foo -= bar"
      #   "foo ||= bar"
      #   "foo &&= bar"
      :opassign,

      # Multiple assignment:
      #   "foo, bar = 1, 2
      :massign,
    ]

    ERROR_TOKENS = [
      :on_parse_error,
      :compile_error,
      :on_assign_error,
      :on_alias_error,
      :on_class_name_error,
      :on_param_error
    ]

    LTYPE_TOKENS = %i[
      on_heredoc_beg on_tstring_beg
      on_regexp_beg on_symbeg on_backtick
      on_symbols_beg on_qsymbols_beg
      on_words_beg on_qwords_beg
    ]

    RESERVED_WORDS = %i[
      __ENCODING__ __LINE__ __FILE__
      BEGIN END
      alias and
      begin break
      case class
      def defined? do
      else elsif end ensure
      false for
      if in
      module
      next nil not
      or
      redo rescue retry return
      self super
      then true
      undef unless until
      when while
      yield
    ]

    class TerminateLineInput < StandardError
      def initialize
        super("Terminate Line Input")
      end
    end

    class << self
      def compile_with_errors_suppressed(code, line_no: 1)
        begin
          result = yield code, line_no
        rescue ArgumentError
          # Ruby can issue an error for the code if there is an
          # incomplete magic comment for encoding in it. Force an
          # expression with a new line before the code in this
          # case to prevent magic comment handling.  To make sure
          # line numbers in the lexed code remain the same,
          # decrease the line number by one.
          code = ";\n#{code}"
          line_no -= 1
          result = yield code, line_no
        end
        result
      end

      def generate_local_variables_assign_code(local_variables)
        # Some reserved words could be a local variable
        # Example: def f(if: 1); binding.irb; end
        # These reserved words should be removed from assignment code
        local_variables -= RESERVED_WORDS
        "#{local_variables.join('=')}=nil;" unless local_variables.empty?
      end

      # Some part of the code is not included in Ripper's token.
      # Example: DATA part, token after heredoc_beg when heredoc has unclosed embexpr.
      # With interpolated tokens, tokens.map(&:tok).join will be equal to code.
      def interpolate_ripper_ignored_tokens(code, tokens)
        line_positions = [0]
        code.lines.each do |line|
          line_positions << line_positions.last + line.bytesize
        end
        prev_byte_pos = 0
        interpolated = []
        prev_line = 1
        tokens.each do |t|
          line, col = t.pos
          byte_pos = line_positions[line - 1] + col
          if prev_byte_pos < byte_pos
            tok = code.byteslice(prev_byte_pos...byte_pos)
            pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
            interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
            prev_line += tok.count("\n")
          end
          interpolated << t
          prev_byte_pos = byte_pos + t.tok.bytesize
          prev_line += t.tok.count("\n")
        end
        if prev_byte_pos < code.bytesize
          tok = code.byteslice(prev_byte_pos..)
          pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
          interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
        end
        interpolated
      end

      def ripper_lex_without_warning(code, local_variables: [])
        verbose, $VERBOSE = $VERBOSE, nil
        lvars_code = generate_local_variables_assign_code(local_variables)
        original_code = code
        if lvars_code
          code = "#{lvars_code}\n#{code}"
          line_no = 0
        else
          line_no = 1
        end

        compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no|
          lexer = Ripper::Lexer.new(inner_code, '-', line_no)
          tokens = []
          lexer.scan.each do |t|
            next if t.pos.first == 0
            prev_tk = tokens.last
            position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize
            if position_overlapped
              tokens[-1] = t if ERROR_TOKENS.include?(prev_tk.event) && !ERROR_TOKENS.include?(t.event)
            else
              tokens << t
            end
          end
          interpolate_ripper_ignored_tokens(original_code, tokens)
        end
      ensure
        $VERBOSE = verbose
      end
    end

    def check_code_state(code, local_variables:)
      tokens = self.class.ripper_lex_without_warning(code, local_variables: local_variables)
      opens = NestingParser.open_nestings(Prism.parse_lex(code, scopes: [local_variables]))
      [tokens, opens, code_terminated?(code, tokens, opens, local_variables: local_variables)]
    end

    def code_terminated?(code, tokens, opens, local_variables:)
      case check_code_syntax(code, local_variables: local_variables)
      when :unrecoverable_error
        true
      when :recoverable_error
        false
      when :other_error
        opens.empty? && !should_continue?(tokens)
      when :valid
        !should_continue?(tokens)
      end
    end

    def assignment_expression?(code, local_variables:)
      # Try to parse the code and check if the last of possibly multiple
      # expressions is an assignment type.

      # If the expression is invalid, Ripper.sexp should return nil which will
      # result in false being returned. Any valid expression should return an
      # s-expression where the second element of the top level array is an
      # array of parsed expressions. The first element of each expression is the
      # expression's type.
      verbose, $VERBOSE = $VERBOSE, nil
      code = "#{RubyLex.generate_local_variables_assign_code(local_variables) || 'nil;'}\n#{code}"
      # Get the last node_type of the line. drop(1) is to ignore the local_variables_assign_code part.
      node_type = Ripper.sexp(code)&.dig(1)&.drop(1)&.dig(-1, 0)
      ASSIGNMENT_NODE_TYPES.include?(node_type)
    ensure
      $VERBOSE = verbose
    end

    def should_continue?(tokens)
      # Look at the last token and check if IRB need to continue reading next line.
      # Example code that should continue: `a\` `a +` `a.`
      # Trailing spaces, newline, comments are skipped
      return true if tokens.last&.event == :on_sp && tokens.last.tok == "\\\n"

      tokens.reverse_each do |token|
        case token.event
        when :on_sp, :on_nl, :on_ignored_nl, :on_comment, :on_embdoc_beg, :on_embdoc, :on_embdoc_end
          # Skip
        when :on_regexp_end, :on_heredoc_end, :on_semicolon
          # State is EXPR_BEG but should not continue
          return false
        else
          # Endless range should not continue
          return false if token.event == :on_op && token.tok.match?(/\A\.\.\.?\z/)

          # EXPR_DOT and most of the EXPR_BEG should continue
          return token.state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_DOT)
        end
      end
      false
    end

    def check_code_syntax(code, local_variables:)
      lvars_code = RubyLex.generate_local_variables_assign_code(local_variables)
      code = "#{lvars_code}\n#{code}"

      begin # check if parser error are available
        verbose, $VERBOSE = $VERBOSE, nil
        case RUBY_ENGINE
        when 'ruby'
          self.class.compile_with_errors_suppressed(code) do |inner_code, line_no|
            RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no)
          end
        when 'jruby'
          JRuby.compile_ir(code)
        else
          catch(:valid) do
            eval("BEGIN { throw :valid, true }\n#{code}")
            false
          end
        end
      rescue EncodingError
        # This is for a hash with invalid encoding symbol, {"\xAE": 1}
        :unrecoverable_error
      rescue SyntaxError => e
        case e.message
        when /unexpected keyword_end/
          # "syntax error, unexpected keyword_end"
          #
          #   example:
          #     if (
          #     end
          #
          #   example:
          #     end
          return :unrecoverable_error
        when /unexpected '\.'/
          # "syntax error, unexpected '.'"
          #
          #   example:
          #     .
          return :unrecoverable_error
        when /unexpected tREGEXP_BEG/
          # "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
          #
          #   example:
          #     method / f /
          return :unrecoverable_error
        when /unterminated (?:string|regexp) meets end of file/
          # "unterminated regexp meets end of file"
          #
          #   example:
          #     /
          #
          # "unterminated string meets end of file"
          #
          #   example:
          #     '
          return :recoverable_error
        when /unexpected end-of-input/
          # "syntax error, unexpected end-of-input, expecting keyword_end"
          #
          #   example:
          #     if true
          #       hoge
          #       if false
          #         fuga
          #       end
          return :recoverable_error
        else
          return :other_error
        end
      ensure
        $VERBOSE = verbose
      end
      :valid
    end

    def calc_indent_level(opens)
      indent_level = 0
      opens.each_with_index do |elem, index|
        case elem.event
        when :on_heredoc_beg
          if opens[index + 1]&.event != :on_heredoc_beg
            if elem.tok.match?(/^<<[~-]/)
              indent_level += 1
            else
              indent_level = 0
            end
          end
        when :on_tstring_beg, :on_regexp_beg, :on_symbeg, :on_backtick
          # No indent: "", //, :"", ``
          # Indent: %(), %r(), %i(), %x()
          indent_level += 1 if elem.tok.start_with? '%'
        when :on_embdoc_beg
          indent_level = 0
        else
          indent_level += 1 unless elem.tok == 'alias' || elem.tok == 'undef'
        end
      end
      indent_level
    end

    FREE_INDENT_NESTINGS = %i[on_tstring_beg on_backtick on_regexp_beg on_symbeg]

    def free_indent_nesting_element?(elem)
      FREE_INDENT_NESTINGS.include?(elem&.event)
    end

    # Calculates the difference of pasted code's indent and indent calculated from tokens
    def indent_difference(lines, line_results, line_index)
      loop do
        prev_opens, _next_opens, min_depth = line_results[line_index]
        open_elem = prev_opens.last
        if !open_elem || (open_elem.event != :on_heredoc_beg && !free_indent_nesting_element?(open_elem))
          # If the leading whitespace is an indent, return the difference
          indent_level = calc_indent_level(prev_opens.take(min_depth))
          calculated_indent = 2 * indent_level
          actual_indent = lines[line_index][/^ */].size
          return actual_indent - calculated_indent
        elsif open_elem.event == :on_heredoc_beg && open_elem.tok.match?(/^<<[^-~]/)
          return 0
        end
        # If the leading whitespace is not an indent but part of a multiline token
        # Calculate base_indent of the multiline token's beginning line
        line_index = open_elem.pos[0] - 1
      end
    end

    def process_indent_level(parse_lex_result, lines, line_index, is_newline)
      line_results = NestingParser.parse_by_line(parse_lex_result)
      result = line_results[line_index]
      if result
        prev_opens, next_opens, min_depth = result
      else
        # When last line is empty
        prev_opens = next_opens = line_results.last[1]
        min_depth = next_opens.size
      end

      # To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation.
      # Shortest open tokens can be calculated by `opens.take(min_depth)`
      indent = 2 * calc_indent_level(prev_opens.take(min_depth))

      preserve_indent = lines[line_index - (is_newline ? 1 : 0)][/^ */].size

      prev_open_elem = prev_opens.last
      next_open_elem = next_opens.last

      # Calculates base indent for pasted code on the line where prev_open_elem is located
      # irb(main):001:1*   if a # base_indent is 2, indent calculated from nestings is 0
      # irb(main):002:1*         if b # base_indent is 6, indent calculated from nestings is 2
      # irb(main):003:0>           c # base_indent is 6, indent calculated from nestings is 4
      if prev_open_elem
        base_indent = [0, indent_difference(lines, line_results, prev_open_elem.pos[0] - 1)].max
      else
        base_indent = 0
      end

      if free_indent_nesting_element?(prev_open_elem)
        if is_newline && prev_open_elem.pos[0] == line_index
          # First newline inside free-indent token
          base_indent + indent
        else
          # Accept any number of indent inside free-indent token
          preserve_indent
        end
      elsif prev_open_elem&.event == :on_embdoc_beg || next_open_elem&.event == :on_embdoc_beg
        if prev_open_elem&.event == next_open_elem&.event
          # Accept any number of indent inside embdoc content
          preserve_indent
        else
          # =begin or =end
          0
        end
      elsif prev_open_elem&.event == :on_heredoc_beg
        tok = prev_open_elem.tok
        if prev_opens.size <= next_opens.size
          if is_newline && lines[line_index].empty? && line_results[line_index - 1][0].last != next_open_elem
            # First line in heredoc
            tok.match?(/^<<[-~]/) ? base_indent + indent : indent
          elsif tok.match?(/^<<~/)
            # Accept extra indent spaces inside `<<~` heredoc
            [base_indent + indent, preserve_indent].max
          else
            # Accept any number of indent inside other heredoc
            preserve_indent
          end
        else
          # Heredoc close
          prev_line_indent_level = calc_indent_level(prev_opens)
          tok.match?(/^<<[~-]/) ? base_indent + 2 * (prev_line_indent_level - 1) : 0
        end
      else
        base_indent + indent
      end
    end

    def ltype_from_open_nestings(opens)
      start_nesting = opens.reverse_each.find do |elem|
        LTYPE_TOKENS.include?(elem.event)
      end
      return nil unless start_nesting

      case start_nesting&.event
      when :on_tstring_beg
        case start_nesting&.tok
        when ?"      then ?"
        when /^%.$/  then ?"
        when /^%Q.$/ then ?"
        when ?'      then ?'
        when /^%q.$/ then ?'
        end
      when :on_regexp_beg   then ?/
      when :on_symbeg       then ?:
      when :on_backtick     then ?`
      when :on_qwords_beg   then ?]
      when :on_words_beg    then ?]
      when :on_qsymbols_beg then ?]
      when :on_symbols_beg  then ?]
      when :on_heredoc_beg
        start_nesting&.tok =~ /<<[-~]?(['"`])\w+\1/
        $1 || ?"
      else
        nil
      end
    end

    def check_termination_in_prev_line(code, local_variables:)
      tokens = self.class.ripper_lex_without_warning(code, local_variables: local_variables)
      past_first_newline = false
      index = tokens.rindex do |t|
        # traverse first token before last line
        if past_first_newline
          if t.tok.include?("\n")
            true
          end
        elsif t.tok.include?("\n")
          past_first_newline = true
          false
        else
          false
        end
      end

      if index
        first_token = nil
        last_line_tokens = tokens[(index + 1)..(tokens.size - 1)]
        last_line_tokens.each do |t|
          unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event)
            first_token = t
            break
          end
        end

        if first_token && first_token.state != Ripper::EXPR_DOT
          tokens_without_last_line = tokens[0..index]
          code_without_last_line = tokens_without_last_line.map(&:tok).join
          opens_without_last_line = NestingParser.open_nestings(Prism.parse_lex(code_without_last_line, scopes: [local_variables]))
          if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line, local_variables: local_variables)
            return last_line_tokens.map(&:tok).join
          end
        end
      end
      false
    end
  end
  # :startdoc:
end

RubyLex = IRB::RubyLex
Object.deprecate_constant(:RubyLex)