LICENSE = \ """ MIT License Copyright (c) 2023 ...

LICENSE = \
"""
MIT License

Copyright (c) 2023 SCHNORR_PUBKEY_89813cade9dd320d8dd5b7952c281fd314c6bcd2968a3476286232ed5fdb5554

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

README = \
"""
NGB MONOLITH v1

This is the nostr-git-bot. It can be used to automatically serialize entire git repos
into the `content` field of a kind 0 `nostr` message.

There isn't much magic here, but it's roboust and requires NIP-01 only.

I recommend that:
0. Repos with a single admin give each repo its own keypair..
1. Repos with >1 admins use whichever multisig solution becomes popular on nostr,
with the repo itself still having it's own nostr identity to avoid higher
NIP requirements for people trying to download/contribute.

MONOLITH NOTES
...
"""

USAGE = \
"""

You can use this to upload a repo to the nostr network with a client that
allows straightforward access to ALL kind-0 fields (gossip works as of 2023 April).

Alternatively, you can use a popular text field in any client such as the
"about/description" field, but it might look weird.

It is possible that some relays will limit the length of kind-0-s in the future,
requiring POW or SATs to post long git repos, but who knows...

Usage with gossip...

Uploading:
from ngb import *
ngp = NostrGitParser()

# absolute path to the root of the repo, ending with a separator
ngp.set_repo_path(PATH_TO_YOUR_REPO)

ngp.parse_repo()

# DESIRED_STRING_FILE will be created, but the directory has to exist
with open(DESIRED_STRING_FILE, 'w') as f:
f.write( ngp.quick_to_string() )

Now open the DESIRED_STRING_FILE, manually remove the starting and ending
double quotes, and copy the rest (starting w/ '{' and ending w/ '}') to a field
in your kind-0. That's it.

Cloning:
Find the repo on nostr... copy-paste the text from the relevant kind-0 field
to a text file on your computer, let's call that file FILE_TO_READ_FROM.
Add a double-quote manually at the beginning of the text, and one to the very end.
Save the file with the quotes... Then run the following:

from ngb import *
with open(FILE_TO_READ_FROM, 'r') as f:
text_content = f.read()
ngp = NostrGitParser()
ngp.quick_from_string(text_content)
ngp.clone_from_self(DESIRED_CLONE_FOLDER_THAT_DOESNT_EXIST_YET)
"""

import os
import json
import subprocess

# --------------------------------------------------------------
# BEGIN monolith --- replace some imports with inline stuff

# ----------
# BEGIN LAFF

def list_filenames_in_folder(abs_folder_path):
"""
Get all file names from `folder`, and simply return it as a list of strings.
Paths must be absolute.

Tip:
Use foxre.relist(pattern{str}, in_list) to filter!
"""

# ensure `folder` is an absolute path
if not os.path.isabs(abs_folder_path):
raise Exception(
"`return_absolute_filenames=True` requires absolute path..."
" got relative, aborting..."
)

# return the array of files concatenated to the folder name
return [
os.path.join(abs_folder_path, fname)
for fname
in os.listdir(abs_folder_path)
]

def _folder_recursion(
current_root,
storage_list,
include_root=True,
ignore_hidden=True,
ignored_folder_names = []
):
"""
Direct calls obsolete. Use list_subfolders.recursively instead.

Fills storage_list with the absolute paths of folders
traversed down recursively from current_root.

Usage:
lst = list()
_folder_recursion(some_path, lst)
"""

# append root if necessary
if include_root:
storage_list.append(current_root)

# create an auxiliary list to store filtered subsolders
abs_subdirs_now = []

# over all subdirectories (these are relative names)...
for rel_subdir in os.listdir(current_root):

# ignore files
if not os.path.isdir(os.path.join(current_root, rel_subdir)):
continue

# ignore hidden as per settings
if ignore_hidden and rel_subdir[0] in "._":
continue

# ignore parameter passed ignored folders
if rel_subdir in ignored_folder_names:
continue

# everything passed add subdir at this step as absolute path
abs_subdirs_now.append( os.path.join(current_root, rel_subdir) )

# add each subdir to storage list call this function recursively on them...
for abs_subdir in abs_subdirs_now:
storage_list.append(abs_subdir)
_folder_recursion(abs_subdir, storage_list, include_root=False)

def list_subfolders_recursively(abs_root, include_root=True):
"""
Returns a list of all subfolders obtained recursively.
Writes absolute paths. Requires absolute root.

This function is just a wrapper around `_folder_recursion`
to allow simple x=fun(y) usage.
"""

# make sure passed path is absolute...
if not os.path.isabs(abs_root):
raise Exception("`abs_root` must be an absolute path!")

# do `folder_recursion` into a new list and return the list...
lst = list()
_folder_recursion(abs_root, lst, include_root=include_root)
return lst

def list_files_recursively(abs_root, include_root=True, list_folders=False):
"""
Returns a list of all files (or files and folders) within subfolders.
Requires absolute path.
"""

# make sure passed path is absolute...
if not os.path.isabs(abs_root):
raise Exception("`abs_root` must be an absolute path!")

# list to return
to_return = list()

# generate recursive list of subsolders
subfolders = list_subfolders_recursively(abs_root, include_root)

# for each subfolder...
for folder in subfolders:

# get relative file names in directory...
file_candidates = os.listdir(folder)

# transform relative filenames to absolute filenames
file_candidates = [
os.path.join(folder, relname)
for relname
in file_candidates
]

# filtering...
for file_candidate in file_candidates:

# folders not requested and candidate is folder --> skip
if list_folders == False and os.path.isdir(file_candidate):
continue

# add candidate to return array...
to_return.append(file_candidate)

# done
return to_return

# END LAFF
# ----------

# ------------
# BEGIN PRINT

class cprint():
"""
A class for color (or otherwise formatted) printing in terminal.
Currently supports the following class-methods:

__class__.red_print(*args, **kwargs)
__class__.green_print(*args, **kwargs)
__class__.blue_print(*args, **kwargs)
__class__.yellow_print(*args, **kwargs)
__class__.grey_print(*args, **kwargs)
__class__.cyan_print(*args, **kwargs)
__class__.magenta_print(*args, **kwargs)

You can define others by adding functions.
"""
_DEFAULT = '\033[99m'
_WHITE = '\033[97m'
_CYAN = '\033[96m'
_MAGENTA = '\033[95m'
_BLUE = '\033[94m'
_YELLOW = '\033[93m'
_GREEN = '\033[92m'
_RED = '\033[91m'
_GREY = '\033[90m'
_UNDERLINE = '\033[4m'
_BOLD = '\033[1m'
_ENDC = '\033[0m'

@classmethod
def _prototype(self, flag, *args, **kwargs):
print(flag, end="")
print(*args, **kwargs)
print(self._ENDC, end="")
return "" # necessary so that wrapping inside another
# print won't print the returned None

@classmethod
def red_print(self, *args, **kwargs):
self._prototype(self._RED, *args, **kwargs)

@classmethod
def green_print(self, *args, **kwargs):
self._prototype(self._GREEN, *args, **kwargs)

@classmethod
def blue_print(self, *args, **kwargs):
self._prototype(self._BLUE, *args, **kwargs)

@classmethod
def yellow_print(self, *args, **kwargs):
self._prototype(self._YELLOW, *args, **kwargs)

@classmethod
def grey_print(self, *args, **kwargs):
self._prototype(self._GREY, *args, **kwargs)

@classmethod
def cyan_print(self, *args, **kwargs):
self._prototype(self._CYAN, *args, **kwargs)

@classmethod
def magenta_print(self, *args, **kwargs):
self._prototype(self._MAGENTA, *args, **kwargs)

# END PRINT
# -----------

# END monolith
# --------------------------------------------------------------

def is_binary(abs_file_path : str) -> bool:
"""
Try to determine if a file is binary by checking if it contains the zero byte.
Will detect UTF-16 as binary.
There is an interesting discussion on SO regarding the issue... bottom line is,
git diff uses the same method to determine whether a file is binary.

UPDATE
Turns out this doesn't work like half the time, so I implemented a fallback
in `fill_dict_with_file_from_relpath()` in case we get a `UnicodeDecodeError`
"""
with open(abs_file_path, 'rb') as f:
chunk_size = 256
while True:
next_chunk = f.read(chunk_size)
if b'\0' in next_chunk:
return True
if len(next_chunk) == 0:
return False

def make_byte_size_human_readable(size_in_bytes : int) -> str:
""" Turn a byte size human readable... """
if size_in_bytes < 1000:
return f"{size_in_bytes} bytes"
elif size_in_bytes < 1000 ** 2:
return f"{float(size_in_bytes) / float(1000 ** 1):.1f} KB"
elif size_in_bytes < 1000 ** 3:
return f"{float(size_in_bytes) / float(1000 ** 2):.1f} MB"
else:
return f"{float(size_in_bytes) / float(1000 ** 3):.1f} GB"

def binary_file_to_string(
abs_path : str,
transcoding : str
) -> str:
"""
Converts a binary to string for transporting over json as plain text.
Complement is `string_to_bytes()`.
Use the matching encoding when writing back to binary.

DEVELOPMENT
Decided to just hex the binaries for now...
A later version might implement other ways.
"""
with open(abs_path, 'rb') as f:
bytes_content = f.read()
# hex override until stable
str_version = bytes_content.hex()
#str_version = bytes_content.decode(transcoding)
return str_version

# for debugging, switched to hex transcoding only
# def string_to_bytes(
# arg_str : str,
# transcoding : str
# ) -> bytes:
# """ Complements binary_file_to_string. """
# return arg_str.encode(transcoding)

class NostrGitParser():
"""
Class that will serialize / deserialize the relevant data for the
repo.
"""

def __init__(self):

# core config...
self._repo_path = None
self.meta = {}
# HEX OVERRIDE USED FOR DEVELOPMENT
self.uniform_binary_transcoding = "latin-1"

# automatically filled... folders do not count as files!!!
self.repo_files = {}
self.repo_folders = {}
self.git_files = {}
self.git_folders = {}

# auxiliaries
# stores the repo filename array for further processing...
self.__repo_fileames_as_list = []

def set_repo_path(self, desired_repo_path : str) -> bool:
"""
Sets the repo path to a new value. Returns success bool.

New path must:
- be absolute
- end with a separator ('/' usually, if not, rethink your life choices)
- be an initialized git repo or look like one (ie have a '.git' subfolder)
"""

# make sure path is absolute
if not os.path.isabs(desired_repo_path):
cprint.yellow_print("`desired_repo_path` should be an absolute path")
return False

# make sure path ends with OS specific path separator
if desired_repo_path.rfind( os.sep ) != ( len(desired_repo_path) - len(os.sep) ):
cprint.yellow_print(
f"`desired_repo_path` must end with os specific separator... aborting\n"
f" Separator for your platform is : '{os.sep}'"
)
return False

# make sure path is a folder
if not os.path.isdir(desired_repo_path):
cprint.yellow_print("`desired_repo_path` is not a directory... aborted")
return False

# make sure path is a folder w/ a `.git` subfolder inside
if not os.path.isdir( desired_repo_path + os.sep + ".git" ):
cprint.yellow_print(
"`desired_repo_path` doesn't seem to have a '.git' subfolder... aborting"
)
return False

# log old value if overridden (both `None` and "" are treated as False)
if self._repo_path:
cprint.blue_print(
f"Overriding old path:\n"
f" {self._repo_path}"
)

# finally change the path...
self._repo_path = desired_repo_path

# ...and log...
cprint.green_print(
f"Repo path successfully set to::\n"
f" {self._repo_path}"
)

# ... and signal success
return True

def parse_repo(self) -> bool:
"""
Discover the files which are tracked in the repo.
Read the content of the files into self.
Also read the necessary folder names to recreate everything.

Returns the success bool.
"""

# ensure `_repo_path` is valid
# if valid, `set_repo_path()` will have ensured it's a git repo (or looks like one)...
if not self._repo_path:
cprint.yellow_print(
"Please set repo path b4 attempting repo discovery. Aborting..."
)
return False

# reset relevant arrays and dicts
self.repo_files = {}
self.repo_folders = {}
self.git_files = {}
self.git_folders = {}
self.__repo_fileames_as_list = []

# -------------------------------------------
# BEGIN perform system call to get file names as array

# make shell command that outputs names of all tracked repo files
command_to_run = (
f"git --git-dir={self._repo_path + '.git'} ls-files"
)

# returns a `subprocess.CompletedProcess`
completed_process = subprocess.run(
command_to_run,
shell=True,
capture_output=True,
text=True
)

# process errored out? log and abort
if completed_process.stderr:
cprint.yellow_print(
"git --git-dir=... ls-files errored out as follows:"
)
cprint.grey_print(completed_process.stderr)
cprint.yellow_print("Aborted.")
return False

# completed_process.stdout will be of the form:
# "FILEPATH1\nFILEPATH2\n...FILEPATHN\n"

# split by newlines... lhs is `list`
filenames = completed_process.stdout.split('\n')

# note that the expected output ends in a newline, so we added '' as last item
# verify that the empty string is the last list elem, and remove it..
if filenames[-1] == '':
filenames.pop()

# DEBUG print all filenames now
cprint.blue_print("Tracked files:")
for filename in filenames:
cprint.grey_print(" " + filename)

# store as aux in case we wish to refactor from this point...
self.__repo_filenames_as_list = filenames

# END perform system call to get file names as array
# -------------------------------------------

# are you paying attention? we're still in the function...
# might be refactored into smaller, reusable parts later...

# -------------------------------------------
# BEGIN fill the `repo_files` dict...

"""
Text files will be read and their content will be copy-pasted.
Binaries are a more challenging...
0. How do I know which file is binray and which one is text?
1. How should I encode binaries when everything is passed as raw text?

Answers:
0. I don't, I'm legit guessing.
1. encode as hex for dev, later probably latin-1 idk if that would work
"""

# for each local file path in the repo...
for filename in self.__repo_filenames_as_list:

# make first key, which is local file path
self.repo_files[filename] = {}

# fill subdict...
self.fill_dict_with_file_from_relpath(
dict_ref = self.repo_files[filename],
relpath = filename
)

# END fill the `repo_files` dict...
# -------------------------------------------

# -------------------------------------------
# BEGIN fill the `repo_folders` dict...

# The folders that are part of the repo are actually extracted from the filenames

# over all (local) filenames...
for filename_key in self.repo_files:

# ...try to extract directory path...

# log
cprint.magenta_print(
f"Attempting to extract directory name from {filename_key}..."
)

# not in a local directory (ie is in root of repo)? --> no relevant folder
if os.sep not in filename_key:
continue

# find start of last separator position in path & use it to get folder path...
# separator will NOT be included in folder path
last_separator_start = filename_key.rfind(os.sep) - ( len(os.sep) - 1 )
folder_path = filename_key[:last_separator_start]

# now add folder path if not already in repo_folders dict...
# one might be tempted to use a list for storing folder names
# but since dicts are hashed, this is probably more efficient than
# comparing a million who knows how long strings...
if folder_path not in self.repo_folders:
cprint.green_print(f" Extracted {folder_path}")
self.repo_folders[folder_path] = {}

# END fill the `repo_folders` dict...
# -------------------------------------------

# -------------------------------------------
# BEGIN fill the `git_folders` dict...

# in this case, we WILL store the names of empty folders too for good measure
# remember, `self.repo_path` ends in `os.sep`...
# lhs will be a list of absolute paths
git_folders_as_list = list_subfolders_recursively(
self._repo_path + ".git",
include_root = True
)

# add keys to the relavant dict... declare sub-dictionaries under
# make sure to cut from absolute path to relative path
for git_abspath in git_folders_as_list:
git_relpath = git_abspath[len(self._repo_path):]
self.git_folders[git_relpath] = {}

# END fill the `git_folders` dict...
# -------------------------------------------

# -------------------------------------------
# BEGIN fill the `git_files` dict...

# fetch non-folder files from the .git folder...
# the list will contain absolute paths
git_files_as_list = list_files_recursively(
self._repo_path + ".git",
include_root = True,
list_folders = False
)

# now transform the absolute paths to relpaths wrt repo root...
# fill git_relpaths_as_list with the relpaths
#
# Note: since we only cut off self._repo_path, the .git/ part remains and
# later we can pass the relpath directly to `fill_dict_with_file_from_relpath()`
git_relpaths_as_list = []
for abs_path in git_files_as_list:
git_relpaths_as_list.append(
abs_path[len(self._repo_path):]
)

# put them in the right format...
for git_relpath in git_relpaths_as_list:
self.git_files[git_relpath] = {}
self.fill_dict_with_file_from_relpath(
dict_ref = self.git_files[git_relpath],
relpath = git_relpath
)

# END fill the `git_files` dict...
# -------------------------------------------

# signal success
return True

def fill_dict_with_file_from_relpath(
self,
dict_ref : dict = {},
relpath : str = ""
) -> None:
"""
Process a file into a dict... writes several info fields and the `fcontent`,
which contains the file's contents (either as is or transcoded from
binary to text).

Binary/Text distinction is made and considered when writing.
"""

# create aux variable for absolute path
abspath = self._repo_path + relpath

# write important descriptors such as file size...
dict_ref['size_numeric'] = os.path.getsize(abspath)
dict_ref['size_human_readable'] = \
make_byte_size_human_readable( os.path.getsize(abspath) )

# if we predict the file not to be binary...
if is_binary(abspath) == False:
# ...try to read as text...
try:
cprint.grey_print(f"(TEXT) Attempting to read {abspath}")
dict_ref['is_binary'] = False
dict_ref['transcoding'] = "N/A"
with open(abspath, 'r') as f:
dict_ref['fcontent'] = f.read()
return
# ... but if reading fails with `UnicodeDecodeError`, our prediction was probably
# wrong, thus we pass to the binary part
except UnicodeDecodeError:
cprint.yellow_print("Unicode decode error! Falling back to binary mode...")
pass

# read file as binary
cprint.grey_print(f"(BINARY) Attempting to read {abspath}")
dict_ref['is_binary'] = True
#dict_ref['transcoding'] = self.uniform_binary_transcoding
dict_ref['transcoding'] = "HEX DEBUGGING OVERRIDE"
dict_ref['fcontent'] = binary_file_to_string(
abspath,
self.uniform_binary_transcoding
)

return

def _make_file_from_dict_and_relpath(
self,
arg_target_path : str = "",
dict_ref : dict = {},
relpath : str = ""
) -> None:
"""
Make a file from a dict and a relpath. Assumes the directory exists.
Called by `clone_from_self()` which ensures this.
"""

# absolute desired file path
abspath = arg_target_path + relpath

# write file
if dict_ref['is_binary']:
#bytes_to_write = dict_ref['fcontent'].encode( dict_ref['transcoding'] )
bytes_to_write = bytes.fromhex(dict_ref['fcontent'])
with open(abspath, 'wb') as f:
f.write(bytes_to_write)
else:
with open(abspath, 'w') as f:
f.write(dict_ref['fcontent'])

# log
print(f"New file written to {abspath}...")

def clone_from_self(self, target_path : str, force : bool = False):
"""
Clones the entire repo from data stored in self...

`target_path`:
0. must be absolute
1. must end with an '/' or equivalent separator
2. cannot be an existing directory, to prevent overwriting shit

`force` can be set to `True` to allow overriding

Returns the success bool.
"""

# ensure `target_path` ends in separator ('/')
if target_path[-1] != os.sep:
cprint.yellow_print(
"`target_path` in `clone_from_self()` must end in '/' or equivalent... abort"
)
return False

# ensure `target_path` is absolute
if not os.path.isabs(target_path):
cprint.yellow_print(
"`target_path` in `clone_from_self()` must be absolute... aborting"
)
return False

# ensure `target_path` isn't an existing directory
if os.path.isdir(target_path) and not force:
cprint.yellow_print(
"`clone_from_self()` called with `target_path pointing to an existing dir`\n"
" this is not allowed... aborting"
)
return False

# create target path
os.makedirs(target_path, exist_ok=True)

# then make all git directories
for git_folder_relpath in self.git_folders:
os.makedirs(target_path + git_folder_relpath, exist_ok=True)

# then make all repo directories
for repo_folder_relpath in self.repo_folders:
os.makedirs(target_path + repo_folder_relpath, exist_ok=True)

# dump all git files...
# `git_files` is a dict, git_file_relpath is also the relevant key
for git_file_relpath in self.git_files:
self._make_file_from_dict_and_relpath(
arg_target_path = target_path,
dict_ref = self.git_files[git_file_relpath],
relpath = git_file_relpath
)

# dump all repo files
# `repo_files` is a dict, repo_file_relpath is also the relevant key
for repo_file_relpath in self.repo_files:
self._make_file_from_dict_and_relpath(
arg_target_path = target_path,
dict_ref = self.repo_files[repo_file_relpath],
relpath = repo_file_relpath
)

def to_dict(self):

d = dict()

# description data
d['meta'] = self.meta

# file data
d['repo_files'] = self.repo_files
d['repo_folders'] = self.repo_folders
d['git_files'] = self.git_files
d['git_folders'] = self.git_folders

return d

def from_dict(self, d):

# description data
self.meta = d['meta']

# file data
self.repo_files = d['repo_files']
self.repo_folders = d['repo_folders']
self.git_files = d['git_files']
self.git_folders = d['git_folders']

def quick_to_string(self):
"""
Turns the repo into a string...
"""
# must dump twice, bc 1st one will yield unescaped top-level double-quotes
double_dumped = json.dumps( json.dumps(self.to_dict()) )
cprint.green_print("Generated double-dumped string version...")
return double_dumped

def quick_from_string(self, arg_str):
# double load, see `quick_to_string()` for explanation
self.from_dict(json.loads(json.loads(arg_str)))

CodeWarrior on Nostr: LICENSE = \ """ MIT License Copyright (c) 2023 ...