Lex ...

npub1m3u…70lp

2024-08-29 01:01:34

Lex Fridman的播客经常会采访一些大牛，搜到个Kaggle上的数据集，有300多期访谈CSV文件。
https://t.co/sHNLx6neMx

下载后让Claude写个Python程序生成300个txt文件，传到notebookLM使用。

生成文件Python代码：

import csv
import os
import sys

# 增加CSV字段大小限制
csv.field_size_limit(sys.maxsize)

def create_txt_files_from_csv(csv_file_path):
# 确保输出目录存在
output_dir = 'output_txt_files'
os.makedirs(output_dir, exist_ok=True)

# 读取CSV文件
with open(csv_file_path, 'r', newline='', encoding='utf-8') as csvfile:
csv_reader = csv.DictReader(csvfile)

# 遍历CSV的每一行
for row in csv_reader:
# 获取title和text
title = row['title']
text = row['text']

# 创建安全的文件名（移除不允许的字符）
safe_title = "".join([c for c in title if c.isalpha() or c.isdigit() or c==' ']).rstrip()

# 如果文件名为空，使用id作为文件名
if not safe_title:
safe_title = f"file_{row['id']}"

# 创建文件路径
file_path = os.path.join(output_dir, f"{safe_title}.txt")

# 写入文本文件
with open(file_path, 'w', encoding='utf-8') as txtfile:
txtfile.write(text)

print(f"Created file: {file_path}")

# 使用函数
csv_file_path = 'podcastdata_dataset.csv' # 替换为你的CSV文件路径
create_txt_files_from_csv(csv_file_path)

Author Public Key

npub1m3uw6cg4fysn0uq288mfgzx8zgx3vtckgdjj9df6km4m9rqkffhqpv70lp

Show more details

Published at

2024-08-29 01:01:34

Kind type

1 Short Text Note

Event JSON

{ "id": "b36e44c8bc67ad883b42bd31b2ac27ad2330d170546ad8e45e5b5b12f043435a", "pubkey": "dc78ed6115492137f00a39f69408c7120d162f16436522b53ab6ebb28c164a6e", "created_at": 1724893294, "kind": 1, "tags": [], "content": "Lex Fridman的播客经常会采访一些大牛，搜到个Kaggle上的数据集，有300多期访谈CSV文件。\nhttps://t.co/sHNLx6neMx\n\n下载后让Claude写个Python程序生成300个txt文件，传到notebookLM使用。\n\n生成文件Python代码：\n\nimport csv\nimport os\nimport sys\n\n# 增加CSV字段大小限制\ncsv.field_size_limit(sys.maxsize)\n\ndef create_txt_files_from_csv(csv_file_path):\n # 确保输出目录存在\n output_dir = 'output_txt_files'\n os.makedirs(output_dir, exist_ok=True)\n \n # 读取CSV文件\n with open(csv_file_path, 'r', newline='', encoding='utf-8') as csvfile:\n csv_reader = csv.DictReader(csvfile)\n \n # 遍历CSV的每一行\n for row in csv_reader:\n # 获取title和text\n title = row['title']\n text = row['text']\n \n # 创建安全的文件名（移除不允许的字符）\n safe_title = \"\".join([c for c in title if c.isalpha() or c.isdigit() or c==' ']).rstrip()\n \n # 如果文件名为空，使用id作为文件名\n if not safe_title:\n safe_title = f\"file_{row['id']}\"\n \n # 创建文件路径\n file_path = os.path.join(output_dir, f\"{safe_title}.txt\")\n \n # 写入文本文件\n with open(file_path, 'w', encoding='utf-8') as txtfile:\n txtfile.write(text)\n \n print(f\"Created file: {file_path}\")\n\n# 使用函数\ncsv_file_path = 'podcastdata_dataset.csv' # 替换为你的CSV文件路径\ncreate_txt_files_from_csv(csv_file_path) https://pbs.twimg.com/media/GWG4KwPbsAA-Df4.jpg https://pbs.twimg.com/media/GWG4YyvasAAUD8N.jpg https://pbs.twimg.com/media/GWG4eS2bMAAcqE-.jpg", "sig": "507f09b27668c2d593e443764d4e23f5a56cd8672e3cd1ccdae9e852e370d86a3d5b0c3468091b5372c8f8681befdeafb7f1e7cfab131ef0db702da1fdbac579" }