export function tokenize(input: string) {
  const tokens = [];
  const regex = /[\p{L}\p{N}\p{M}'-]+|[.,!?;: ]/gu;

  let match;
  while ((match = regex.exec(input)) !== null) {
    const token = match[0];
    if (/[\p{L}\p{N}\p{M}'-]+/u.test(token)) {
      tokens.push({ value: token, type: 'text' });
    } else {
      tokens.push({ value: token, type: 'punct' });
    }
  }

  return tokens;
}
