Skip to content

Commit d993734

Browse files
authored
Update age_load to load scalar property values with appropriate type (#1519) (#1525) (#1581)
* Update age_load to load scalar property values with appropriate type (#1519) (#1525) Previously, property values from csv files were always loaded as strings. This patch adds a boolean function parameter `load_as_agtype`. When this parameter is true, values are converted to an appropriate scalar type (i.e. string, bool, numeric, null) while loading. Otherwise, values are loaded as string. It uses the agtype_value_from_cstring() function for conversion. Additional change(s): ------------------- - Fix: for csv rows in edge files, create_agtype_from_list_i()'s start_index is corrected to 4 Note: ---- - It applies both patch 1519 and 1525. Conflicts: src/backend/utils/load/ag_load_labels.c src/backend/utils/load/age_load.c src/include/utils/load/ag_load_edges.h src/include/utils/load/age_load.h * Borrow json_validate() from PG16 The json_validate() function is borrowed from PG16 with minor changes. Because it does not exist in other versions of PG. * Update json_validate() to support PG12 and below For PG12 and below, pg_parse_json() does not return an error code. A TRY-CATCH block is used to catch errors and return false instead.
1 parent 601550b commit d993734

File tree

14 files changed

+336
-26
lines changed

14 files changed

+336
-26
lines changed

age--1.5.0--y.y.y.sql

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,21 @@
2828
-- Please add all additions, deletions, and modifications to the end of this
2929
-- file. We need to keep the order of these changes.
3030

31+
DROP FUNCTION IF EXISTS ag_catalog.load_labels_from_file(name, name, text, bool);
32+
CREATE FUNCTION ag_catalog.load_labels_from_file(graph_name name,
33+
label_name name,
34+
file_path text,
35+
id_field_exists bool default true,
36+
load_as_agtype bool default false)
37+
RETURNS void
38+
LANGUAGE c
39+
AS 'MODULE_PATHNAME';
40+
41+
DROP FUNCTION IF EXISTS ag_catalog.load_edges_from_file(name, name, text);
42+
CREATE FUNCTION ag_catalog.load_edges_from_file(graph_name name,
43+
label_name name,
44+
file_path text,
45+
load_as_agtype bool default false)
46+
RETURNS void
47+
LANGUAGE c
48+
AS 'MODULE_PATHNAME';
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
start_id, start_vertex_type, end_id, end_vertex_type, string, bool, numeric,
2+
1, Person1, 1, Person2, "John Smith", "true", 1
3+
1, Person1, 1, Person2, "John", "false", "-2"
4+
1, Person1, 1, Person2, John Smith, true, 1.4
5+
1, Person1, 1, Person2, """John""", false, -1e10
6+
1, Person1, 1, Person2, null, false, 0
7+
1, Person1, 1, Person2, nUll, false, "3.14"
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
id, string, bool, numeric,
2+
1, "John Smith", "true", 1
3+
2, "John", "false", "-2"
4+
3, John Smith, true, 1.4
5+
4, """John""", false, -1e10
6+
5, null, false, 0
7+
6, nUll, false, "3.14"

regress/expected/age_load.out

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,127 @@ NOTICE: graph "agload_test_graph" has been dropped
233233

234234
(1 row)
235235

236+
--
237+
-- Test property type conversion
238+
--
239+
SELECT create_graph('agload_conversion');
240+
NOTICE: graph "agload_conversion" has been created
241+
create_graph
242+
--------------
243+
244+
(1 row)
245+
246+
-- vertex: load as agtype
247+
SELECT create_vlabel('agload_conversion','Person1');
248+
NOTICE: VLabel "Person1" has been created
249+
create_vlabel
250+
---------------
251+
252+
(1 row)
253+
254+
SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv', true, true);
255+
load_labels_from_file
256+
-----------------------
257+
258+
(1 row)
259+
260+
SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person1) RETURN properties(n) $$) as (a agtype);
261+
a
262+
------------------------------------------------------------------------------------
263+
{"id": 1, "bool": true, "__id__": 1, "string": "John Smith", "numeric": 1}
264+
{"id": 2, "bool": false, "__id__": 2, "string": "John", "numeric": -2}
265+
{"id": 3, "bool": true, "__id__": 3, "string": "John Smith", "numeric": 1.4}
266+
{"id": 4, "bool": false, "__id__": 4, "string": "John", "numeric": -10000000000.0}
267+
{"id": 5, "bool": false, "__id__": 5, "string": null, "numeric": 0}
268+
{"id": 6, "bool": false, "__id__": 6, "string": "nUll", "numeric": 3.14}
269+
(6 rows)
270+
271+
-- vertex: load as string
272+
SELECT create_vlabel('agload_conversion','Person2');
273+
NOTICE: VLabel "Person2" has been created
274+
create_vlabel
275+
---------------
276+
277+
(1 row)
278+
279+
SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv', true, false);
280+
load_labels_from_file
281+
-----------------------
282+
283+
(1 row)
284+
285+
SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person2) RETURN properties(n) $$) as (a agtype);
286+
a
287+
-------------------------------------------------------------------------------------
288+
{"id": "1", "bool": "true", "__id__": 1, "string": "John Smith", "numeric": "1"}
289+
{"id": "2", "bool": "false", "__id__": 2, "string": "John", "numeric": "-2"}
290+
{"id": "3", "bool": "true", "__id__": 3, "string": "John Smith", "numeric": "1.4"}
291+
{"id": "4", "bool": "false", "__id__": 4, "string": "\"John\"", "numeric": "-1e10"}
292+
{"id": "5", "bool": "false", "__id__": 5, "string": "null", "numeric": "0"}
293+
{"id": "6", "bool": "false", "__id__": 6, "string": "nUll", "numeric": "3.14"}
294+
(6 rows)
295+
296+
-- edge: load as agtype
297+
SELECT create_elabel('agload_conversion','Edges1');
298+
NOTICE: ELabel "Edges1" has been created
299+
create_elabel
300+
---------------
301+
302+
(1 row)
303+
304+
SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.csv', true);
305+
load_edges_from_file
306+
----------------------
307+
308+
(1 row)
309+
310+
SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges1]->() RETURN properties(e) $$) as (a agtype);
311+
a
312+
--------------------------------------------------------------
313+
{"bool": true, "string": "John Smith", "numeric": 1}
314+
{"bool": false, "string": "John", "numeric": -2}
315+
{"bool": true, "string": "John Smith", "numeric": 1.4}
316+
{"bool": false, "string": "John", "numeric": -10000000000.0}
317+
{"bool": false, "string": null, "numeric": 0}
318+
{"bool": false, "string": "nUll", "numeric": 3.14}
319+
(6 rows)
320+
321+
-- edge: load as string
322+
SELECT create_elabel('agload_conversion','Edges2');
323+
NOTICE: ELabel "Edges2" has been created
324+
create_elabel
325+
---------------
326+
327+
(1 row)
328+
329+
SELECT load_edges_from_file('agload_conversion', 'Edges2', 'age_load/conversion_edges.csv', false);
330+
load_edges_from_file
331+
----------------------
332+
333+
(1 row)
334+
335+
SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN properties(e) $$) as (a agtype);
336+
a
337+
-------------------------------------------------------------
338+
{"bool": "true", "string": "John Smith", "numeric": "1"}
339+
{"bool": "false", "string": "John", "numeric": "-2"}
340+
{"bool": "true", "string": "John Smith", "numeric": "1.4"}
341+
{"bool": "false", "string": "\"John\"", "numeric": "-1e10"}
342+
{"bool": "false", "string": "null", "numeric": "0"}
343+
{"bool": "false", "string": "nUll", "numeric": "3.14"}
344+
(6 rows)
345+
346+
SELECT drop_graph('agload_conversion', true);
347+
NOTICE: drop cascades to 6 other objects
348+
DETAIL: drop cascades to table agload_conversion._ag_label_vertex
349+
drop cascades to table agload_conversion._ag_label_edge
350+
drop cascades to table agload_conversion."Person1"
351+
drop cascades to table agload_conversion."Person2"
352+
drop cascades to table agload_conversion."Edges1"
353+
drop cascades to table agload_conversion."Edges2"
354+
NOTICE: graph "agload_conversion" has been dropped
355+
drop_graph
356+
------------
357+
358+
(1 row)
359+

regress/sql/age_load.sql

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,30 @@ SELECT * FROM cypher('agload_test_graph', $$
7979
$$) AS (result_1 agtype, result_2 agtype);
8080

8181
SELECT drop_graph('agload_test_graph', true);
82+
83+
--
84+
-- Test property type conversion
85+
--
86+
SELECT create_graph('agload_conversion');
87+
88+
-- vertex: load as agtype
89+
SELECT create_vlabel('agload_conversion','Person1');
90+
SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.csv', true, true);
91+
SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person1) RETURN properties(n) $$) as (a agtype);
92+
93+
-- vertex: load as string
94+
SELECT create_vlabel('agload_conversion','Person2');
95+
SELECT load_labels_from_file('agload_conversion', 'Person2', 'age_load/conversion_vertices.csv', true, false);
96+
SELECT * FROM cypher('agload_conversion', $$ MATCH (n:Person2) RETURN properties(n) $$) as (a agtype);
97+
98+
-- edge: load as agtype
99+
SELECT create_elabel('agload_conversion','Edges1');
100+
SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.csv', true);
101+
SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges1]->() RETURN properties(e) $$) as (a agtype);
102+
103+
-- edge: load as string
104+
SELECT create_elabel('agload_conversion','Edges2');
105+
SELECT load_edges_from_file('agload_conversion', 'Edges2', 'age_load/conversion_edges.csv', false);
106+
SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN properties(e) $$) as (a agtype);
107+
108+
SELECT drop_graph('agload_conversion', true);

sql/age_main.sql

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,17 +120,23 @@ CREATE FUNCTION ag_catalog.drop_label(graph_name name, label_name name,
120120
LANGUAGE c
121121
AS 'MODULE_PATHNAME';
122122

123+
--
124+
-- If `load_as_agtype` is true, property values are loaded as agtype; otherwise
125+
-- loaded as string.
126+
--
123127
CREATE FUNCTION ag_catalog.load_labels_from_file(graph_name name,
124128
label_name name,
125129
file_path text,
126-
id_field_exists bool default true)
130+
id_field_exists bool default true,
131+
load_as_agtype bool default false)
127132
RETURNS void
128133
LANGUAGE c
129134
AS 'MODULE_PATHNAME';
130135

131136
CREATE FUNCTION ag_catalog.load_edges_from_file(graph_name name,
132137
label_name name,
133-
file_path text)
138+
file_path text,
139+
load_as_agtype bool default false)
134140
RETURNS void
135141
LANGUAGE c
136142
AS 'MODULE_PATHNAME';

src/backend/utils/adt/agtype.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ typedef enum /* type categories for datum_to_agtype */
8888
} agt_type_category;
8989

9090
static inline Datum agtype_from_cstring(char *str, int len);
91-
static inline agtype_value *agtype_value_from_cstring(char *str, int len);
9291
size_t check_string_length(size_t len);
9392
static void agtype_in_agtype_annotation(void *pstate, char *annotation);
9493
static void agtype_in_object_start(void *pstate);
@@ -352,7 +351,7 @@ Datum agtype_out(PG_FUNCTION_ARGS)
352351
* Uses the agtype parser (with hooks) to construct an agtype.
353352
*/
354353

355-
static inline agtype_value *agtype_value_from_cstring(char *str, int len)
354+
agtype_value *agtype_value_from_cstring(char *str, int len)
356355
{
357356
agtype_lex_context *lex;
358357
agtype_in_state state;

src/backend/utils/load/ag_load_edges.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ void edge_row_cb(int delim __attribute__((unused)), void *data)
105105
end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int);
106106

107107
props = create_agtype_from_list_i(cr->header, cr->fields,
108-
n_fields, 3);
108+
n_fields, 4, cr->load_as_agtype);
109109

110110
insert_edge_simple(cr->graph_id, cr->object_name,
111111
object_graph_id, start_vertex_graph_id,
@@ -158,7 +158,8 @@ int create_edges_from_csv_file(char *file_path,
158158
char *graph_name,
159159
Oid graph_id,
160160
char *object_name,
161-
int object_id )
161+
int object_id,
162+
bool load_as_agtype)
162163
{
163164

164165
FILE *fp;
@@ -195,6 +196,7 @@ int create_edges_from_csv_file(char *file_path,
195196
cr.graph_id = graph_id;
196197
cr.object_name = object_name;
197198
cr.object_id = object_id;
199+
cr.load_as_agtype = load_as_agtype;
198200

199201
while ((bytes_read=fread(buf, 1, 1024, fp)) > 0)
200202
{

src/backend/utils/load/ag_load_labels.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data)
9292
object_graph_id = make_graphid(cr->object_id, label_id_int);
9393

9494
props = create_agtype_from_list(cr->header, cr->fields,
95-
n_fields, label_id_int);
95+
n_fields, label_id_int,
96+
cr->load_as_agtype);
9697
insert_vertex_simple(cr->graph_id, cr->object_name,
9798
object_graph_id, props);
9899
pfree(props);
@@ -145,7 +146,8 @@ int create_labels_from_csv_file(char *file_path,
145146
Oid graph_id,
146147
char *object_name,
147148
int object_id,
148-
bool id_field_exists)
149+
bool id_field_exists,
150+
bool load_as_agtype)
149151
{
150152

151153
FILE *fp;
@@ -184,6 +186,7 @@ int create_labels_from_csv_file(char *file_path,
184186
cr.object_name = object_name;
185187
cr.object_id = object_id;
186188
cr.id_field_exists = id_field_exists;
189+
cr.load_as_agtype = load_as_agtype;
187190

188191

189192

0 commit comments

Comments
 (0)