Autogram

Autograms are sentences that describes themselves in the sense of providing an inventory of their characters. As a result, they are also called self-enumerating or self-documenting sentences.

this sentence contains two a’s, one b, three c’s, one d, thirty-four e’s, five f’s, three g’s, nine h’s, eleven i’s, one j, one k, two l’s, one m, twenty-two n’s, sixteen o’s, one p, one q, eight r’s, twenty-nine s’s, twenty-three t’s, three u’s, four v’s, eight w’s, two x’s, five y’s, one z.

I have written a small Python script (100 lines) using PyTorch to generate the above sentence.

I am curious to know how you would solve the problem, what would be your solution.

from num2words import num2words
import torch
import time
import os
import cProfile, pstats, io
from pstats import SortKey

# os.environ["CUDA_VISIBLE_DEVICES"] = ""

NUM_NUMBERS = 50
NUM_LETTERS = 26
NUM_SEN = 5
NUM_ITERS = 1000000
PRINT_ITER = 10000

print("torch.__version__: " + torch.__version__)
print("torch.cuda.is_available: " + torch.cuda.is_available().__str__())
print("torch.cuda.device_count: " + torch.cuda.device_count().__str__())
print("torch.cuda.current_device: " + torch.cuda.current_device().__str__())
print("torch.cuda.get_device_name(0): " + torch.cuda.get_device_name(0))

A = ord("a")
abc = "         "
abc2 = abc
numbers = torch.zeros([NUM_NUMBERS, NUM_LETTERS], dtype=torch.int32)
goods1 = torch.empty(NUM_SEN)
this_sentence = "this sentence contains "

# Generate abc
for c in range(NUM_LETTERS):
	abc += chr(c + A)
	if c >= 2:		# plural add 's
		abc += "s"
	abc2 += chr(c + A)
	abc2 += ",  "


def to_numbers(word):
	number = torch.zeros([NUM_LETTERS], dtype=torch.int32)
	for letter in word:
		letter = ord(letter) - A
		if 0 <= letter < NUM_LETTERS:
			number[letter] += 1
	return number


# Generate numbers, 2D array
# Each number (row) will store the number of occurrences of each letter
for row in range(NUM_NUMBERS):
	numbers[row] = to_numbers(list(num2words(row)))
numbers_this_sentence = to_numbers(list(this_sentence))
numbers_abc = to_numbers(list(abc))

# Generate candidate sentences
# assuming there will be at least one and at most NUM_NUMBERS occurrences of each letter
size = (NUM_SEN, NUM_LETTERS)
sentences = torch.randint(1, NUM_NUMBERS, size)

work1 = torch.empty(NUM_LETTERS + 2, NUM_LETTERS, dtype=torch.int32)
work1[NUM_LETTERS] = numbers_this_sentence
work1[NUM_LETTERS + 1] = numbers_abc
sum1 = torch.empty(NUM_LETTERS, dtype=torch.int32)


def evaluate_all_sentences():
	for sen in range(NUM_SEN):
		work1[:NUM_LETTERS] = torch.index_select(numbers, 0, sentences[sen])
		sum1[:] = torch.sum(work1, dim=0)
		wrongs = torch.nonzero(sentences[sen] != sum1)
		goods1[sen] = NUM_LETTERS - wrongs.size(dim=0)
		if goods1[sen] == NUM_LETTERS:
			return True  # Solution found
		else:
			change_letter_id = torch.randint(wrongs.size(dim=0), (1,), dtype=torch.int32)
			change_letter_id = wrongs[change_letter_id]
			number = sentences[sen][change_letter_id]
			if sum1[change_letter_id] > number:
				number += 1
			elif sum1[change_letter_id] < number:
				number -= 1
			sentences[sen][change_letter_id] = number.clamp(1, NUM_NUMBERS - 1)

	return False  # Solution not yet found


profile = cProfile.Profile()
profile.enable()

tik = time.perf_counter()

for i in range(NUM_ITERS):
	found = evaluate_all_sentences()
	if i % PRINT_ITER == 0 or found:
		tok = time.perf_counter();
		dtime = tok - tik;
		tik = tok;
		best = goods1.argmax()
		print(abc2)
		print(sentences[best])
		print("%d: goods %f (%d) (%.3f)" % (i, torch.mean(goods1), goods1[best], dtime))
		if goods1[best] == NUM_LETTERS:
			break

for c in range(NUM_LETTERS):
	count = sentences[best][c]
	this_letter = "%s %s" % (num2words(int(count)), chr(c + A))
	this_sentence += this_letter
	this_sentence += "'s, " if count > 1 else ", "
print(this_sentence[:-2] + ".")

profile.disable()
stats_output = io.StringIO()
stats = pstats.Stats(profile, stream=stats_output).sort_stats(SortKey.CUMULATIVE)
stats.print_stats()
# print("".join(s.getvalue().split("\n"))
print("".join(stats_output.getvalue().splitlines(keepends=True)[0:20]))