67 lines
2.0 KiB
Python
67 lines
2.0 KiB
Python
# ---------------------------------------------------------------------- #
|
|
# A preprocess script for JSON file all_data.json to json with images
|
|
# To get raw all_data.json, see:
|
|
# https://github.com/TalwalkarLab/leaf/tree/master/data/celeba
|
|
# ---------------------------------------------------------------------- #
|
|
|
|
import json
|
|
import math
|
|
import numpy as np
|
|
import os
|
|
import sys
|
|
import copy
|
|
from PIL import Image
|
|
|
|
from tqdm import tqdm
|
|
|
|
MAX_USERS = 100
|
|
size = (84, 84)
|
|
|
|
|
|
def name2json(name):
|
|
file_path = os.path.join('raw', 'img_align_celeba', name)
|
|
img = Image.open(file_path)
|
|
gray = img.convert('RGB')
|
|
gray.thumbnail(size, Image.ANTIALIAS)
|
|
gray = gray.resize(size)
|
|
arr = np.asarray(gray).copy().astype(np.uint8)
|
|
vec = arr.flatten()
|
|
vec = vec.tolist()
|
|
return vec
|
|
|
|
|
|
if __name__ == '__main__':
|
|
file = 'all_data/all_data.json'
|
|
|
|
with open(file, 'r') as f:
|
|
raw_data = json.load(f)
|
|
|
|
data = copy.deepcopy(raw_data)
|
|
for idx, user in enumerate(tqdm(raw_data['user_data'])):
|
|
img_names = raw_data['user_data'][user]['x']
|
|
data['user_data'][user]['x'] = []
|
|
for name in img_names:
|
|
js = name2json(name)
|
|
data['user_data'][user]['x'].append(js)
|
|
|
|
# Save to several json files
|
|
|
|
cnt = 0
|
|
file_id = 0
|
|
all_data = {'users': [], 'num_samples': [], 'user_data': {}}
|
|
|
|
for idx, user in enumerate(tqdm(data['user_data'])):
|
|
all_data['users'].append(data['users'][idx])
|
|
all_data['num_samples'].append(data['num_samples'][idx])
|
|
all_data['user_data'][user] = data['user_data'][user]
|
|
cnt += 1
|
|
|
|
if cnt == MAX_USERS or idx == len(data['user_data']) - 1:
|
|
file_name = f'all_data_{file_id}.json'
|
|
file_path = os.path.join('new_all_data', file_name)
|
|
with open(file_path, 'w') as outfile:
|
|
json.dump(all_data, outfile)
|
|
file_id += 1
|
|
cnt = 0
|
|
all_data = {'users': [], 'num_samples': [], 'user_data': {}}
|