Immortality LLM
Aim: Immortality LLM is to build an LLM that is able to make human beings immortal. That means understanding physiology and doing lab work for that end. For this, the LLM must be super-intelligent.
Plan: Every time A.I. has surpassed human ability for each category is what the model comprises and the model is updated when a new category exceeds human capability otherwise it does not get added and goes to a development of the LLM by humans. This is an attempt to widen narrow A.I. to make it more general. It also is nearing S.I. as the ability exceeds humans.
Any method that improves LLMs is required, but that which does it the most and is repeatable then more so.
Milestone 1 Capacities:
- Chess Master:
- Go Master:
- Texas Hold'em Poker Master:
- Jeopardy Master (not part of milestone 1 as it requires too much data at this stage)
- Add more games
Category 1: Master of Games
Our System
- Model is asked about chess, generates a python program to train itself in a simulated environment.
- Appends, alters its training data with the new info (training generates new training data).
- Hits the re-train button on itself.
- It has learned. (automate the process, give it resources)
It keeps doing this to improve its ability to for example play chess.
Ability of model:
- generate the poinent experiment for self-training when prompted.
- output usable, effective training data
- it generates python code that runs the simulation, rather than building the simulation from scratch each time, provide it with functions such as init_chess() from a simulated environment. Interfaces with a workbench that sets up testing facilites with single function calls and ensures output data is effective training data.
- go through its existing training data and improve it with new training data produced.
- hit its own re-train button when changes reach a threshold.
The workbench can be everything, not just A.I., for instance the problem might be an error in some code or a request to install a particular Linux distro and version and more. We understand the limitations of this system, but this what we have at the moment. As we tackle problem that do not lend themselves to computer testing, we develop the problem to become more computer solvable, i.e. computerized physiology and medical and computerized lab work.
Building an LLM
# pip install torch # pip install transformers # pip install datasets # pip install gguf (optional) # python -m datasets-cli download refinedweb # python -m datasets-cli convert refinedweb # CUDA-enabled GPU (if you want to use GPU acceleration) # Set CUDA_VISIBLE_DEVICES environment variable to the ID of your GPU (if you want to use GPU acceleration) # python immortality_llm.py # project_root/ # │ # ├── llm.py # ├── datasets/ # │ ├── dataset1/ # │ ├── dataset2/ # │ └── dataset3/import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertForSequenceClassification, AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset, Dataset, concatenate_datasets
from torch.utils.data import Dataset, DataLoader
import os
import logging
import re
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords', quiet=True)
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import BertTokenizer, BertForSequenceClassification
from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader
import os
# Define the model architectureclass MyLLM(nn.Module):
def __init__(self, vocab_size, hidden_size, num_heads, num_layers, dropout):
super(MyLLM, self).__init__()
self.embedding = nn.Embedding(vocab_size, hidden_size)
self.transformer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=num_heads, dim_feedforward=hidden_size, dropout=dropout)
self.fc = nn.Linear(hidden_size, vocab_size)
def forward(self, input_ids):
x = self.embedding(input_ids)
x = x.permute(1, 0, 2) # TransformerEncoder expects seq_len first
x = self.transformer(x)
x = x.permute(1, 0, 2) # Change back to batch first
x = self.fc(x)
return x
# Define the dataset classclass MyDataset(Dataset):
def __init__(self, dataset, tokenizer, max_len):
self.dataset = dataset
self.tokenizer = tokenizer
self.max_len = max_len
def __getitem__(self, idx):
example = self.dataset[idx]
encoding = self.tokenizer.encode_plus(
example['text'],
add_special_tokens=True,
max_length=self.max_len,
return_attention_mask=True,
return_tensors='pt'
)
return {
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'labels': torch.tensor(example['labels'])
}
def __len__(self):
return len(self.dataset)
# Load the datasetdataset = load_dataset('refinedweb', split='train')
# Create the tokenizertokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# Create the dataset instancemy_dataset = MyDataset(dataset, tokenizer, max_len=512)
# Create the data loaderbatch_size = 32
data_loader = DataLoader(my_dataset, batch_size=batch_size, shuffle=True)
# Initialize the modelmodel = MyLLM(vocab_size=len(tokenizer), hidden_size=256, num_heads=8, num_layers=6, dropout=0.1)
# Initialize the optimizeroptimizer = optim.Adam(model.parameters(), lr=1e-5)
# Train the modeldevice = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
for epoch in range(5):
model.train()
total_loss = 0
for batch in data_loader:
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)
optimizer.zero_grad()
outputs = model(input_ids)
loss = nn.CrossEntropyLoss()(outputs, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f'Epoch {epoch+1}, Loss: {total_loss / len(data_loader)}')
# Save the modelsave_dir = './my_llm'
if not os.path.exists(save_dir):
os.makedirs(save_dir)
torch.save(model.state_dict(), os.path.join(save_dir,'model.pth'))
tokenizer.save_pretrained(save_dir)
# Retrain the model # Load the saved model and tokenizermodel.load_state_dict(torch.load(os.path.join(save_dir,'model.pth')))
tokenizer = BertTokenizer.from_pretrained(save_dir)
# Create a new dataset instancenew_dataset = MyDataset(dataset, tokenizer, max_len=512)
# Create a new data loadernew_data_loader = DataLoader(new_dataset, batch_size=batch_size, shuffle=True)
# Retrain the modelfor epoch in range(5):
model.train()
total_loss = 0
for batch in new_data_loader:
input_ids = batch['input_ids'].to(device)
attention_mask = batch['attention_mask'].to(device)
labels = batch['labels'].to(device)
optimizer.zero_grad()
outputs = model(input_ids)
loss = nn.CrossEntropyLoss()(outputs, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f'Epoch {epoch+1}, Loss: {total_loss / len(new_data_loader)}')
# Use Hugging Face formatfrom transformers import AutoModelForSequenceClassification, AutoTokenizer
model = AutoModelForSequenceClassification.from_pretrained(save_dir)
tokenizer = AutoTokenizer.from_pretrained(save_dir)
# Use GGUF formatimport gguf
model = gguf.load_model(save_dir)
tokenizer = gguf.load_tokenizer(save_dir)