INMP441 micropython WAV

This code is for INMP441 to record the sound and save the sound in wav format

I’m using single LEFT channel MONO for my I2S microphone. Make adjustment for your implementation.

PIN CONFIGURATION:
SCK -> 4
WS -> 5
SD -> 18
LR -> 3 (You may also directly Ground the LR pin)

范例代码

from machine import Pin
from machine import I2S


#======= USER CONFIGURATION =======
RECORD_TIME_IN_SECONDS = 3
SAMPLE_RATE_IN_HZ = 16000
filename = 'mic.wav'
#======= USER CONFIGURATION =======

WAV_SAMPLE_SIZE_IN_BITS = 16
WAV_SAMPLE_SIZE_IN_BYTES = WAV_SAMPLE_SIZE_IN_BITS // 8
MIC_SAMPLE_BUFFER_SIZE_IN_BYTES = 4096
SDCARD_SAMPLE_BUFFER_SIZE_IN_BYTES = MIC_SAMPLE_BUFFER_SIZE_IN_BYTES // 2 # why divide by 2? only using 16-bits of 32-bit samples
NUM_SAMPLE_BYTES_TO_WRITE = int(RECORD_TIME_IN_SECONDS * SAMPLE_RATE_IN_HZ * WAV_SAMPLE_SIZE_IN_BYTES)
NUM_SAMPLES_IN_DMA_BUFFER = 256
NUM_CHANNELS = 1

#====== FUNCTIONS =========
def snip_16_mono(samples_in, samples_out):
    num_samples = len(samples_in) // 4
    for i in range(num_samples):
        samples_out[2*i] = samples_in[4*i + 2]
        samples_out[2*i + 1] = samples_in[4*i + 3]
            
    return num_samples * 2

def create_wav_header(sampleRate, bitsPerSample, num_channels, num_samples):
    datasize = int(num_samples * num_channels * bitsPerSample // 8)
    o = bytes("RIFF",'ascii')                                                   # (4byte) Marks file as RIFF
    o += (datasize + 36).to_bytes(4,'little')                                   # (4byte) File size in bytes excluding this and RIFF marker
    o += bytes("WAVE",'ascii')                                                  # (4byte) File type
    o += bytes("fmt ",'ascii')                                                  # (4byte) Format Chunk Marker
    o += (16).to_bytes(4,'little')                                              # (4byte) Length of above format data
    o += (1).to_bytes(2,'little')                                               # (2byte) Format type (1 - PCM)
    o += (num_channels).to_bytes(2,'little')                                    # (2byte)
    o += (sampleRate).to_bytes(4,'little')                                      # (4byte)
    o += (sampleRate * num_channels * bitsPerSample // 8).to_bytes(4,'little')  # (4byte)
    o += (num_channels * bitsPerSample // 8).to_bytes(2,'little')               # (2byte)
    o += (bitsPerSample).to_bytes(2,'little')                                   # (2byte)
    o += bytes("data",'ascii')                                                  # (4byte) Data Chunk Marker
    o += (datasize).to_bytes(4,'little')                                        # (4byte) Data size in bytes
    return o


################## MAIN #########################

#FOR ICS43434 only
PIN_I2S_LR = 3
Pin(PIN_I2S_LR, Pin.OUT, value=0)
###############
audio_in = I2S(1,
    sck=Pin(4), ws=Pin(5), sd=Pin(18),
    mode=I2S.RX,
    bits=32,
    format=I2S.MONO,
    rate=SAMPLE_RATE_IN_HZ,
    ibuf=20000,
               
)

wav = open(filename,'wb')

# create header for WAV file and write to SD card
wav_header = create_wav_header(
    SAMPLE_RATE_IN_HZ, 
    WAV_SAMPLE_SIZE_IN_BITS, 
    NUM_CHANNELS, 
    SAMPLE_RATE_IN_HZ * RECORD_TIME_IN_SECONDS
)
num_bytes_written = wav.write(wav_header)

# allocate sample arrays
#   memoryview used to reduce heap allocation in while loop
mic_samples = bytearray(MIC_SAMPLE_BUFFER_SIZE_IN_BYTES)
mic_samples_mv = memoryview(mic_samples)
wav_samples = bytearray(SDCARD_SAMPLE_BUFFER_SIZE_IN_BYTES)
wav_samples_mv = memoryview(wav_samples)

num_sample_bytes_written_to_wav = 0

print('Starting')
# read 32-bit samples from I2S microphone, snip upper 16-bits, write snipped samples to WAV file
while num_sample_bytes_written_to_wav < NUM_SAMPLE_BYTES_TO_WRITE:
        # try to read a block of samples from the I2S microphone
        # readinto() method returns 0 if no DMA buffer is full
        num_bytes_read_from_mic = audio_in.readinto(mic_samples_mv)
        if num_bytes_read_from_mic > 0:
            # snip upper 16-bits from each 32-bit microphone sample
            num_bytes_snipped = snip_16_mono(mic_samples_mv[:num_bytes_read_from_mic], wav_samples_mv)
            num_bytes_to_write = min(num_bytes_snipped, NUM_SAMPLE_BYTES_TO_WRITE - num_sample_bytes_written_to_wav)
            num_bytes_written = wav.write(wav_samples_mv[:num_bytes_to_write])
            num_sample_bytes_written_to_wav += num_bytes_written

wav.close()
audio_in.deinit()
print('Done')
print(f'{num_sample_bytes_written_to_wav} sample bytes written to WAV file')

SmolDocling-256M 安装

环境部署

pip install  torch transformers docling_core -i https://mirrors.aliyun.com/pypi/simple/

下载 hf-mirror镜像(默认例子会自动下载)

pip install transformers numpy huggingface_hub -i https://mirrors.aliyun.com/pypi/simple/

export HF_ENDPOINT=https://hf-mirror.com

huggingface-cli download ds4sd/SmolDocling-256M-preview --local-dir SmolDoclin-256M

官方示例 (会自动从huggingface下载模型)

# Prerequisites:
# pip install torch
# pip install docling_core
# pip install transformers

import torch
from docling_core.types.doc import DoclingDocument
from docling_core.types.doc.document import DocTagsDocument
from transformers import AutoProcessor, AutoModelForVision2Seq
from transformers.image_utils import load_image
from pathlib import Path

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Load images
image = load_image("https://upload.wikimedia.org/wikipedia/commons/7/76/GazettedeFrance.jpg")

# Initialize processor and model
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
model = AutoModelForVision2Seq.from_pretrained(
    "ds4sd/SmolDocling-256M-preview",
    torch_dtype=torch.bfloat16,
    _attn_implementation="flash_attention_2" if DEVICE == "cuda" else "eager",
).to(DEVICE)

# Create input messages
messages = [
    {
        "role": "user",
        "content": [
            {"type": "image"},
            {"type": "text", "text": "Convert this page to docling."}
        ]
    },
]

# Prepare inputs
prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(text=prompt, images=[image], return_tensors="pt")
inputs = inputs.to(DEVICE)

# Generate outputs
generated_ids = model.generate(**inputs, max_new_tokens=8192)
prompt_length = inputs.input_ids.shape[1]
trimmed_generated_ids = generated_ids[:, prompt_length:]
doctags = processor.batch_decode(
    trimmed_generated_ids,
    skip_special_tokens=False,
)[0].lstrip()

# Populate document
doctags_doc = DocTagsDocument.from_doctags_and_image_pairs([doctags], [image])
print(doctags)
# create a docling document
doc = DoclingDocument(name="Document")
doc.load_from_doctags(doctags_doc)

# export as any format
# HTML
# Path("Out/").mkdir(parents=True, exist_ok=True)
# output_path_html = Path("Out/") / "example.html"
# doc.save_as_html(output_path_html)
# MD
print(doc.export_to_markdown())

RAGFlow安装

1.Docker 与 Docker-compose安装

docker安装

sudo apt update && sudo apt install docker.io apparmor -y

docker-compose安装

sudo curl -L "https://github.com/docker/compose/releases/download/v2.20.3/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose

sudo chmod +x /usr/local/bin/docker-compose

测试:docker-compose

RAGFlow下载

git clone https://github.com/infiniflow/ragflow.git

cd ragflow/docker

安装RAGflow

docker-compose -f docker-compose.yml up -d

直接通过命令 docker ps 查看信息;

浏览器访问 http://IP 即可进入

RAG 搭建 anythingllm 建议E5v2之前不要尝试 包括V2

angthingllm 可以本地桌面和搭建服务器web端。

下面是WEB端的搭建(需要DOCKER):
Ubuntu下Docker sudo apt update && sudo apt install docker.io apparmor -y

anythingLLM 的Docker部署命令:

export STORAGE_LOCATION=$HOME/anythingllm && \
mkdir -p $STORAGE_LOCATION && \
touch "$STORAGE_LOCATION/.env" && \
docker run -d -p 3001:3001 \
--cap-add SYS_ADMIN \
-v ${STORAGE_LOCATION}:/app/server/storage \
-v ${STORAGE_LOCATION}/.env:/app/server/.env \
-e STORAGE_DIR="/app/server/storage" \
mintplexlabs/anythingllm

访问你的http://IP:3001 即可

坑点:

E5 V2 CPU 不支持 会在首页设置模型处报错,docker直接下线。

vultr 新建4C 8G 可以运行

需要给anythingllm目录整体给予777 权限 即可,下面步骤:

  • 1.即先docker运行失败,后再加入执行
  • 2.chmod -R 777 anythingllm
  • 3.再执行docker命令启动

docker logs报错/usr/local/bin/docker-entrypoint.sh: line 7:   105 Illegal instruction     (core dumped) node /app/server/index.js

在网页设置中 无法进入下一步,网页显示报错
Failed to save LLM settings: Failed to fetch

在默认设置的目录需要写入权限 直接777 完事 chmod -R 777 $HOME/anythingllm

MAC上部署Wan2.1 1.3b生成视频模型安装 ERROR 250312

flash-attention 只能X86下CUDA。apple的MPS 无法安装!!!

撤退!

主要是 :下载代码库,安装依赖,下载模型,调用即可。

  1. GIT上下载Wan2.1 git clone https://github.com/Wan-Video/Wan2.1.git
  2. 进入刚刚下载的wan2.1的目录,在其中安装 pip install -r requirements.txt 其中注意flash-attention的安装;使用清华源安装 pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
  3. 执行修改为hf-mirror的镜像:export HF_ENDPOINT=https://hf-mirror.com;hf-mirror上下载 Wan-AI/Wan2.1-T2V-1.3B huggingface-cli download Wan-AI/Wan2.1-T2V-1.3B --local-dir ./Wan2.1-T2V-1.3B
  4. 进入GIT下载的wan2.1目录进行测试。python generate.py --task t2v-1.3B --size 832*480 --ckpt_dir ./Wan2.1-T2V-1.3B --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."

1.3B只能txt,不能图片转视频且只能最高480P。14B可以指出图片转视频最高720P。

下面来自wan2.1的官方部署信息。

 Installation
Clone the repo:
git clone https://github.com/Wan-Video/Wan2.1.git cd Wan2.1
Install dependencies:
# Ensure torch >= 2.4.0 pip install -r requirements.txt
 Model Download
Models
Download Link
Notes
T2V-14B
🤗 Huggingface 🤖 ModelScope
Supports both 480P and 720P
I2V-14B-720P
🤗 Huggingface 🤖 ModelScope
Supports 720P
I2V-14B-480P
🤗 Huggingface 🤖 ModelScope
Supports 480P
T2V-1.3B
🤗 Huggingface 🤖 ModelScope
Supports 480P
💡Note: The 1.3B model is capable of generating videos at 720P resolution. However, due to limited training at this resolution, the results are generally less stable compared to 480P. For optimal performance, we recommend using 480P resolution.
Download models using 🤗 huggingface-cli:
pip install "huggingface_hub[cli]" huggingface-cli download Wan-AI/Wan2.1-T2V-1.3B --local-dir ./Wan2.1-T2V-1.3B
Download models using 🤖 modelscope-cli:
pip install modelscope modelscope download Wan-AI/Wan2.1-T2V-1.3B --local_dir ./Wan2.1-T2V-1.3B
 Run Text-to-Video Generation
This repository supports two Text-to-Video models (1.3B and 14B) and two resolutions (480P and 720P). The parameters and configurations for these models are as follows:
Task
Resolution
Model
480P
720P
t2v-14B
✔️
✔️
Wan2.1-T2V-14B
t2v-1.3B
✔️

Wan2.1-T2V-1.3B
 (1) Without Prompt Extention
To facilitate implementation, we will start with a basic version of the inference process that skips the prompt extension step.
Single-GPU inference
python generate.py --task t2v-1.3B --size 832*480 --ckpt_dir ./Wan2.1-T2V-1.3B --sample_shift 8 --sample_guide_scale 6 --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
If you encounter OOM (Out-of-Memory) issues, you can use the --offload_model True and --t5_cpu options to reduce GPU memory usage. For example, on an RTX 4090 GPU:
python generate.py --task t2v-1.3B --size 832*480 --ckpt_dir ./Wan2.1-T2V-1.3B --offload_model True --t5_cpu --sample_shift 8 --sample_guide_scale 6 --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."
💡Note: If you are using the T2V-1.3B model, we recommend setting the parameter --sample_guide_scale 6. The --sample_shift parameter can be adjusted within the range of 8 to 12 based on the performance.
Multi-GPU inference using FSDP + xDiT USP
pip install "xfuser>=0.4.1" torchrun --nproc_per_node=8 generate.py --task t2v-1.3B --size 832*480 --ckpt_dir ./Wan2.1-T2V-1.3B --dit_fsdp --t5_fsdp --ulysses_size 8 --sample_shift 8 --sample_guide_scale 6 --prompt "Two anthropomorphic cats in comfy boxing gear and bright gloves fight intensely on a spotlighted stage."