|
@@ -1,3 +1,30 @@
|
|
|
# torchdata
|
|
|
|
|
|
-A PyTorch repo for data loading and utilities to be shared by the PyTorch domain libraries.
|
|
|
+A PyTorch repo for data loading and utilities to be shared by the PyTorch domain libraries.
|
|
|
+
|
|
|
+
|
|
|
+## 为什么开发torchdata?
|
|
|
+经过多年的反馈,我们发现:`DataLoader` `Dataset`
|
|
|
+
|
|
|
+原版将太多功能捆绑在一起,使它们难以扩展、操作或 取代。这在社区中造成了特定于用例的变体的激增,而不是 可互操作元素的生态系统。节省 OSS 维护人员重写、调试和维护这些常用的时间和精力。
|
|
|
+
|
|
|
+```
|
|
|
+
|
|
|
+pip install torchdata
|
|
|
+
|
|
|
+
|
|
|
+import json
|
|
|
+
|
|
|
+class JsonParserIterDataPipe(IterDataPipe):
|
|
|
+ def __init__(self, source_datapipe, **kwargs) -> None:
|
|
|
+ self.source_datapipe = source_datapipe
|
|
|
+ self.kwargs = kwargs
|
|
|
+
|
|
|
+ def __iter__(self):
|
|
|
+ for file_name, stream in self.source_datapipe:
|
|
|
+ data = stream.read()
|
|
|
+ yield file_name, json.loads(data, **self.kwargs)
|
|
|
+
|
|
|
+ def __len__(self):
|
|
|
+ return len(self.source_datapipe)
|
|
|
+```
|