Immortality LLM

This revision is from 2024/07/25 00:29. You can Restore it.

Aim: Immortality LLM is to build an LLM that is able to make human beings immortal. That means understanding physiology and doing lab work for that end. For this, the LLM must be super-intelligent.

Plan: Every time A.I. has surpassed human ability for each category is what the model comprises and the model is updated when a new category exceeds human capability otherwise it does not get added and goes to a development of the LLM by humans. This is an attempt to widen narrow A.I. to make it more general. It also is nearing S.I. as the ability exceeds humans.

Any method that improves LLMs is required, but that which does it the most and is repeatable then more so.

Milestone 1 Capacities:

  • Chess Master:
  • Go Master:
  • Texas Hold'em Poker Master:
  • Jeopardy Master (not part of milestone 1 as it requires too much data at this stage)
  • Add more games

Category 1: Master of Games

Our System

  1. Model is asked about chess, generates a python program to train itself in a simulated environment.
  2. Appends, alters its training data with the new info (training generates new training data).
  3. Hits the re-train button on itself.
  4. It has learned. (automate the process, give it resources)

It keeps doing this to improve its ability to for example play chess.

Ability of model:

  1. generate the poinent experiment for self-training when prompted.
  2. output usable, effective training data
  3. it generates python code that runs the simulation, rather than building the simulation from scratch each time, provide it with functions such as init_chess() from a simulated environment. Interfaces with a workbench that sets up testing facilites with single function calls and ensures output data is effective training data.
  4. go through its existing training data and improve it with new training data produced.
  5. hit its own re-train button when changes reach a threshold.

The workbench can be everything, not just A.I., for instance the problem might be an error in some code or a request to install a particular Linux distro and version and more. We understand the limitations of this system, but this what we have at the moment. As we tackle problem that do not lend themselves to computer testing, we develop the problem to become more computer solvable, i.e. computerized physiology and medical and computerized lab work.

Building an LLM

# pip install torch # pip install transformers # pip install datasets # pip install gguf (optional) # python -m datasets-cli download refinedweb # python -m datasets-cli convert refinedweb # CUDA-enabled GPU (if you want to use GPU acceleration) # Set CUDA_VISIBLE_DEVICES environment variable to the ID of your GPU (if you want to use GPU acceleration) # python immortality_llm.py # project_root/ # │ # ├── llm.py # ├── datasets/ # │ ├── dataset1/ # │ ├── dataset2/ # │ └── dataset3/

import torch

import torch.nn as nn

import torch.optim as optim

from transformers import BertTokenizer, BertForSequenceClassification, AutoModelForSequenceClassification, AutoTokenizer

from datasets import load_dataset, Dataset, concatenate_datasets

from torch.utils.data import Dataset, DataLoader

import os

import logging

import re

import nltk

from nltk.corpus import stopwords

nltk.download('stopwords', quiet=True)

import torch

import torch.nn as nn

import torch.optim as optim

from transformers import BertTokenizer, BertForSequenceClassification

from datasets import load_dataset

from torch.utils.data import Dataset, DataLoader

import os

# Define the model architecture

class MyLLM(nn.Module):

def __init__(self, vocab_size, hidden_size, num_heads, num_layers, dropout):

super(MyLLM, self).__init__()

self.embedding = nn.Embedding(vocab_size, hidden_size)

self.transformer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=num_heads, dim_feedforward=hidden_size, dropout=dropout)

self.fc = nn.Linear(hidden_size, vocab_size)

def forward(self, input_ids):

x = self.embedding(input_ids)

x = x.permute(1, 0, 2) # TransformerEncoder expects seq_len first

x = self.transformer(x)

x = x.permute(1, 0, 2) # Change back to batch first

x = self.fc(x)

return x

# Define the dataset class

class MyDataset(Dataset):

def __init__(self, dataset, tokenizer, max_len):

self.dataset = dataset

self.tokenizer = tokenizer

self.max_len = max_len

def __getitem__(self, idx):

example = self.dataset[idx]

encoding = self.tokenizer.encode_plus(

example['text'],

add_special_tokens=True,

max_length=self.max_len,

return_attention_mask=True,

return_tensors='pt'

)

return {

'input_ids': encoding['input_ids'].flatten(),

'attention_mask': encoding['attention_mask'].flatten(),

'labels': torch.tensor(example['labels'])

}

def __len__(self):

return len(self.dataset)

# Load the dataset

dataset = load_dataset('refinedweb', split='train')

# Create the tokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Create the dataset instance

my_dataset = MyDataset(dataset, tokenizer, max_len=512)

# Create the data loader

batch_size = 32

data_loader = DataLoader(my_dataset, batch_size=batch_size, shuffle=True)

# Initialize the model

model = MyLLM(vocab_size=len(tokenizer), hidden_size=256, num_heads=8, num_layers=6, dropout=0.1)

# Initialize the optimizer

optimizer = optim.Adam(model.parameters(), lr=1e-5)

# Train the model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model.to(device)

for epoch in range(5):

model.train()

total_loss = 0

for batch in data_loader:

input_ids = batch['input_ids'].to(device)

attention_mask = batch['attention_mask'].to(device)

labels = batch['labels'].to(device)

optimizer.zero_grad()

outputs = model(input_ids)

loss = nn.CrossEntropyLoss()(outputs, labels)

loss.backward()

optimizer.step()

total_loss += loss.item()

print(f'Epoch {epoch+1}, Loss: {total_loss / len(data_loader)}')

# Save the model

save_dir = './my_llm'

if not os.path.exists(save_dir):

os.makedirs(save_dir)

torch.save(model.state_dict(), os.path.join(save_dir,'model.pth'))

tokenizer.save_pretrained(save_dir)

# Retrain the model # Load the saved model and tokenizer

model.load_state_dict(torch.load(os.path.join(save_dir,'model.pth')))

tokenizer = BertTokenizer.from_pretrained(save_dir)

# Create a new dataset instance

new_dataset = MyDataset(dataset, tokenizer, max_len=512)

# Create a new data loader

new_data_loader = DataLoader(new_dataset, batch_size=batch_size, shuffle=True)

# Retrain the model

for epoch in range(5):

model.train()

total_loss = 0

for batch in new_data_loader:

input_ids = batch['input_ids'].to(device)

attention_mask = batch['attention_mask'].to(device)

labels = batch['labels'].to(device)

optimizer.zero_grad()

outputs = model(input_ids)

loss = nn.CrossEntropyLoss()(outputs, labels)

loss.backward()

optimizer.step()

total_loss += loss.item()

print(f'Epoch {epoch+1}, Loss: {total_loss / len(new_data_loader)}')

# Use Hugging Face format

from transformers import AutoModelForSequenceClassification, AutoTokenizer

model = AutoModelForSequenceClassification.from_pretrained(save_dir)

tokenizer = AutoTokenizer.from_pretrained(save_dir)

# Use GGUF format

import gguf

model = gguf.load_model(save_dir)

tokenizer = gguf.load_tokenizer(save_dir)

  

📝 📜 ⏱️ ⬆️