|
|
|
@ -5,6 +5,7 @@ import cn.hutool.http.HttpResponse;
|
|
|
|
|
import cn.hutool.json.JSONUtil; |
|
|
|
|
import com.alibaba.fastjson.JSON; |
|
|
|
|
import com.alibaba.fastjson.JSONObject; |
|
|
|
|
import com.alibaba.fastjson.TypeReference; |
|
|
|
|
import com.alibaba.fastjson.serializer.SerializerFeature; |
|
|
|
|
import com.google.common.collect.Lists; |
|
|
|
|
import com.hnac.gglm.bigmodel.BigModelConstants; |
|
|
|
@ -79,53 +80,8 @@ public class WeaviateService {
|
|
|
|
|
return !result.hasErrors(); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// /**
|
|
|
|
|
// * 对象批量保存向量数据库
|
|
|
|
|
// * @param entities 保存对象列表
|
|
|
|
|
// * @param className 保存表名
|
|
|
|
|
// * @param attrsMap 待计算的列信息 key-向量名 value-实体类对象属性,多个按逗号分隔
|
|
|
|
|
// * @return 保存操作结果
|
|
|
|
|
// */
|
|
|
|
|
// public Boolean saveBatch(List entities,String className, Map<String,String> attrsMap) {
|
|
|
|
|
// entities = entities.subList(0, 1);
|
|
|
|
|
// ObjectCreator creator = weaviateClient.data().creator().withClassName(className);
|
|
|
|
|
// List<String> vectorStrs = Lists.newArrayList();
|
|
|
|
|
// List<String> attrs = Lists.newArrayList();
|
|
|
|
|
// if(Func.isNotEmpty(attrsMap)) {
|
|
|
|
|
// // 格式化数据
|
|
|
|
|
// attrsMap.forEach((k,v) -> attrs.add(v));
|
|
|
|
|
// // 解析待计算的向量字段
|
|
|
|
|
// entities.forEach(entity -> {
|
|
|
|
|
// List<String> vectorStr = attrs.stream().map(fields -> this.getFieldValue(fields, entity)).filter(Func::isNotEmpty).collect(Collectors.toList());
|
|
|
|
|
// vectorStrs.addAll(vectorStr);
|
|
|
|
|
// });
|
|
|
|
|
// }
|
|
|
|
|
// if(Func.isNotEmpty(vectorStrs)) {
|
|
|
|
|
// // 若解析出来的向量存在值
|
|
|
|
|
// Float[] vectors = this.compute(vectorStrs);
|
|
|
|
|
// List<Map<String, Float[]>> vector = this.splitVector(entities.size(), attrsMap, vectors);
|
|
|
|
|
// for(int i = 0; i < entities.size(); i++) {
|
|
|
|
|
// // log.info("vector:{}",JSON.toJSONString(vector.get(i)));
|
|
|
|
|
// Map<String, Object> properties = this.objectToMap(entities.get(i));
|
|
|
|
|
// log.info("properties:{}",JSON.toJSONString(properties));
|
|
|
|
|
// Result<WeaviateObject> run = creator.withProperties(properties).withVectors(vector.get(i)).run();
|
|
|
|
|
// if(run.hasErrors()) {
|
|
|
|
|
// log.error("保存失败!,保存结果为:{}",JSON.toJSONString(run));
|
|
|
|
|
// }
|
|
|
|
|
// }
|
|
|
|
|
// } else {
|
|
|
|
|
// entities.forEach(entity -> creator.withProperties(this.objectToMap(entity)).run());
|
|
|
|
|
// return true;
|
|
|
|
|
// }
|
|
|
|
|
// return false;
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
public Boolean saveBatch(List entities,String className, Map<String,String> attrsMap) { |
|
|
|
|
Optional modelId = entities.stream().filter(entity -> { |
|
|
|
|
JSONObject jsonObject = JSONObject.parseObject(JSON.toJSONString(entity)); |
|
|
|
|
return "1442295246932828161".equals(jsonObject.getString("modelId")); |
|
|
|
|
}).findFirst(); |
|
|
|
|
// 查询Weaviate 4.7.0表是否存在 若不存则新建表
|
|
|
|
|
public String saveBatch(List entities,String className, Map<String,String> attrsMap) { |
|
|
|
|
// 查询表是否存在 若不存则新建表
|
|
|
|
|
Result<Boolean> existResult = weaviateClient.schema().exists().withClassName(BigModelConstants.PREFIX + "_" + className).run(); |
|
|
|
|
if(existResult.hasErrors() || !existResult.getResult()) { |
|
|
|
|
Map<java.lang.String,Object> createTableParams = new HashMap<>(2); |
|
|
|
@ -140,22 +96,48 @@ public class WeaviateService {
|
|
|
|
|
} |
|
|
|
|
Map<String,Object> params = new HashMap<>(2); |
|
|
|
|
params.put("table_name", className); |
|
|
|
|
// 将entities按size截断为1000个一组
|
|
|
|
|
List<List> entitiesList = splitList(entities, 1000); |
|
|
|
|
int total = 0; |
|
|
|
|
for (List entityList : entitiesList) { |
|
|
|
|
Integer insert = this.insert(entityList, className, attrsMap, params); |
|
|
|
|
total += insert; |
|
|
|
|
} |
|
|
|
|
// 查询weaviate 中该表的数据量
|
|
|
|
|
return String.format("传入数据总量为:%s 保存成功数量为:%s", entities.size(), total); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
* 将list按size截断为多个list |
|
|
|
|
* @param list 待截断的list |
|
|
|
|
* @param size 截断大小 |
|
|
|
|
* @return |
|
|
|
|
* @param <T> |
|
|
|
|
*/ |
|
|
|
|
public static <T> List<List<T>> splitList(List<T> list, int size) { |
|
|
|
|
List<List<T>> parts = new ArrayList<>(); |
|
|
|
|
for (int i = 0; i < list.size(); i += size) { |
|
|
|
|
parts.add(list.subList(i, Math.min(list.size(), i + size))); |
|
|
|
|
} |
|
|
|
|
return parts; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
private Integer insert(List entities,String className, Map<String,String> attrsMap, Map<String,Object> params) { |
|
|
|
|
List<Map<String, Object>> data = new ArrayList<>(); |
|
|
|
|
entities.forEach(entity -> { |
|
|
|
|
// 将entity转换为Map<String,String>
|
|
|
|
|
JSONObject jsonObject = JSONObject.parseObject(JSON.toJSONString(entity, SerializerFeature.WriteMapNullValue)); |
|
|
|
|
Map<String,String> map = new HashMap<>(); |
|
|
|
|
jsonObject.forEach((k,v) -> map.put(k,Optional.ofNullable(jsonObject.getString(k)).orElse(""))); |
|
|
|
|
data.add(this.getVectorData(map,attrsMap)); |
|
|
|
|
jsonObject.forEach((k,v) -> map.put(k,jsonObject.getString(k))); |
|
|
|
|
}); |
|
|
|
|
log.info("data:{}",JSON.toJSONString(data)); |
|
|
|
|
params.put("data",data); |
|
|
|
|
String url = gglmUrl + invokeApi.getInsertVectors(); |
|
|
|
|
RequestClientUtil.postCall(url,params); |
|
|
|
|
return true; |
|
|
|
|
Map<String, Object> stringIntegerMap = RequestClientUtil.postCall(url, params, new TypeReference<Map<String, Object>>() { |
|
|
|
|
}); |
|
|
|
|
return (Integer) stringIntegerMap.get("total"); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private Map<String,Object> getVectorData(Map<String,String> entity,Map<String,String> attrsMap) { |
|
|
|
|
Map<String,Object> result = new HashMap<>(2); |
|
|
|
|
result.put("object", entity); |
|
|
|
|