FS-TFP/federatedscope/cv/dataset/preprocess/celeba_preprocess.py

67 lines
2.0 KiB
Python

# ---------------------------------------------------------------------- #
# A preprocess script for JSON file all_data.json to json with images
# To get raw all_data.json, see:
# https://github.com/TalwalkarLab/leaf/tree/master/data/celeba
# ---------------------------------------------------------------------- #
import json
import math
import numpy as np
import os
import sys
import copy
from PIL import Image
from tqdm import tqdm
MAX_USERS = 100
size = (84, 84)
def name2json(name):
file_path = os.path.join('raw', 'img_align_celeba', name)
img = Image.open(file_path)
gray = img.convert('RGB')
gray.thumbnail(size, Image.ANTIALIAS)
gray = gray.resize(size)
arr = np.asarray(gray).copy().astype(np.uint8)
vec = arr.flatten()
vec = vec.tolist()
return vec
if __name__ == '__main__':
file = 'all_data/all_data.json'
with open(file, 'r') as f:
raw_data = json.load(f)
data = copy.deepcopy(raw_data)
for idx, user in enumerate(tqdm(raw_data['user_data'])):
img_names = raw_data['user_data'][user]['x']
data['user_data'][user]['x'] = []
for name in img_names:
js = name2json(name)
data['user_data'][user]['x'].append(js)
# Save to several json files
cnt = 0
file_id = 0
all_data = {'users': [], 'num_samples': [], 'user_data': {}}
for idx, user in enumerate(tqdm(data['user_data'])):
all_data['users'].append(data['users'][idx])
all_data['num_samples'].append(data['num_samples'][idx])
all_data['user_data'][user] = data['user_data'][user]
cnt += 1
if cnt == MAX_USERS or idx == len(data['user_data']) - 1:
file_name = f'all_data_{file_id}.json'
file_path = os.path.join('new_all_data', file_name)
with open(file_path, 'w') as outfile:
json.dump(all_data, outfile)
file_id += 1
cnt = 0
all_data = {'users': [], 'num_samples': [], 'user_data': {}}