#! /usr/bin/env nix-shell #! nix-shell -i python3 -p 'python3.withPackages (ps: with ps; [ transformers argparse ])' import sys import argparse from transformers import AutoTokenizer def main(): parser = argparse.ArgumentParser( description="Tokenize input text using specified model tokenizer." ) parser.add_argument( '-m', '--model', type=str, default='gpt2', help='Model name for tokenizer, e.g., "gpt2", "distilgpt2". Default is "gpt2".' ) args = parser.parse_args() # Read text from standard input text = sys.stdin.read().strip() # Load tokenizer for the specified model tokenizer = AutoTokenizer.from_pretrained(args.model) tokens = tokenizer.encode(text) print(len(tokens)) if __name__ == "__main__": main()