import he from 'he';

export const formatKeywordData = (data) => {
  return data
    .map((item) => {
      const mainKeywordFormatted = `${item.word}(${item.frequency})`;
      const insideKeywordsFormatted = item.insideKeywords
        .map((ik) => ik.keyword)
        .join(', ');

      return insideKeywordsFormatted
        ? `${mainKeywordFormatted},\n${insideKeywordsFormatted}`
        : mainKeywordFormatted;
    })
    .join(',\n');
};

export const formatwebsiteUrls = (data) => {
  if (!data) return '';

  const arr = data.split(/\s*,?\s*\n\s*/g);
  console.log(arr);

  const cleanData = arr.map((word) => word.replace(/^new page-/, '').trim());

  console.log(
    'clean data',
    cleanData.filter((word) => word !== '')
  );

  return cleanData.join(',\n');
};

//this function is used to extract and identify ancor text when analyzing internal links
//he.decode is used to be able to decode special char like è,é ...
export const extractAnchorTexts = (html) => {
  const anchorRegex = /<a[^>]*>(.*?)<\/a>/gi;
  return [...html.matchAll(anchorRegex)].map((match) => he.decode(match[1]));
};

// Function to extract main keywords with frequency as a string from db (data is an array of objects)
export const extractKeywordsWithFrequency = (semanticGap) => {
  // Map through the semanticGap array and format each keyword with its frequency
  const keywordsWithFrequency = semanticGap.map((item) => {
    return `${item.word}(${item.frequency})`;
  });

  // Join the formatted keywords into a single string separated by commas
  return keywordsWithFrequency.join(',\n');
};

//extract keywords with freq from the combined keyword strings => output isa string
// export const extractStrKeywordsWithFrequency = (input) => {
//   // Regular expression to match keywords with frequency e.g., "keyword(1)"
//   const regex = /\b\p{L}+\(\d+\)/gu;

//   // Extract all matches using the regex
//   const matches = input.match(regex);
//   console.log('matches', matches);

//   // Join the matches back into a string separated by commas if matches exist
//   return matches ? matches.join(', ') : '';
// };

export const extractStrKeywordsWithFrequency = (input) => {
  // Split the input by new lines or commas, and trim whitespace
  const parts = input.split(/[\n,]+/).map((part) => part.trim());

  // Filter the parts that follow the pattern: multi-word or single word + (digits)
  const matches = parts.filter((part) => {
    // Find the opening and closing parentheses
    const openParen = part.lastIndexOf('('); // Use lastIndexOf in case there are parentheses in the text
    const closeParen = part.indexOf(')');

    // Check if the format matches: any text + (digits)
    if (openParen > 0 && closeParen === part.length - 1) {
      const keyword = part.slice(0, openParen).trim(); // The keyword is the text before the parentheses
      const frequency = part.slice(openParen + 1, closeParen);

      // Validate if the frequency is numeric
      return keyword.length > 0 && /^\d+$/.test(frequency);
    }

    return false;
  });

  // Join the valid matches back into a string, separated by commas
  return matches.join(', ');
};

// Function to transform API response
export const transformData = (apiResponse) => {
  if (!apiResponse || !apiResponse.result) {
    console.warn('API response is undefined or missing result:', apiResponse);
    return [];
  }

  return apiResponse.result.map((item) => ({
    frequency: item.freq,
    insideKeywords: item.insideKeyword.map((ik) => ({ keyword: ik.keyword })),
    word: item.keyword,
  }));
};

// Function to parse and transform the keywords string into semanticGap structure
export const transformToSemanticGap = (keywords) => {
  // Split the keywords string into individual lines
  const lines = keywords.split(/,\n|\n/);

  // Initialize the semanticGap array
  const semanticGap = [];

  // Iterate through the lines to parse keywords and inside keywords
  for (let i = 0; i < lines.length; i++) {
    const line = lines[i].trim();

    // Match main keywords with frequency, e.g., "code(2)"
    const mainKeywordMatch = line.match(/^(.+?)\((\d+)\)$/);

    if (mainKeywordMatch) {
      // Extract word and frequency
      const word = mainKeywordMatch[1].trim();
      const frequency = parseInt(mainKeywordMatch[2], 10);

      // Initialize the insideKeywords array
      const insideKeywords = [];

      // Check for subsequent inside keywords (not containing frequency)
      while (i + 1 < lines.length && !lines[i + 1].includes('(')) {
        i++;
        const insideKeyword = lines[i].trim();
        insideKeywords.push({ keyword: insideKeyword });
      }

      // Add the parsed data to semanticGap
      semanticGap.push({ word, frequency, insideKeywords });
    }
  }

  return semanticGap;
};
