Mercurial > ~darius > hgwebdir.cgi > wh1080
comparison db/insertwunderground.c @ 0:9dab44dcb331
Initial commit of Greg's code from http://www.lemis.com/grog/tmp/wh1080.tar.gz
author | Daniel O'Connor <darius@dons.net.au> |
---|---|
date | Tue, 09 Feb 2010 13:44:25 +1030 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9dab44dcb331 |
---|---|
1 /* | |
2 * Read in Wunderground comma-delimited files and convert them into INSERT | |
3 * commands for MySQL. | |
4 * | |
5 * $Id: insertwunderground.c,v 1.5 2009/12/09 03:27:55 grog Exp $ | |
6 */ | |
7 #include <ctype.h> | |
8 #include <stdio.h> | |
9 #include <stdlib.h> | |
10 #include <string.h> | |
11 #include <sys/file.h> | |
12 #include <sys/stat.h> | |
13 #include <sys/types.h> | |
14 #include <unistd.h> | |
15 | |
16 | |
17 #define BUFSIZE 1024 | |
18 char attribute_list [BUFSIZE]; | |
19 char buf [BUFSIZE]; | |
20 char sql_command [BUFSIZE]; | |
21 int attribute_count; | |
22 char *station; /* name of station */ | |
23 char *date; /* and date from second parameter, if present */ | |
24 | |
25 /* | |
26 * Convert the Wunderground field names to our table. This is confused by the | |
27 * fact that Wunderground can't make up its mind what to call the fields, so | |
28 * multiple names are possible, and they can have spaces in them. If we don't | |
29 * care about the field, we have a null translation. | |
30 * | |
31 * Some fields are problems: time is really two fields, date and time, and | |
32 * precipitation is sometimes in cm rather than mm. We special-case them. | |
33 */ | |
34 struct attributes | |
35 { | |
36 char *wundername; /* name supplied in file */ | |
37 char *column_name; /* and the names we use */ | |
38 } attributes [] = { | |
39 {"Clouds", "clouds"}, | |
40 {"Conditions", "conditions"}, | |
41 {"Dew PointC", "outside_dewpoint"}, | |
42 {"DewpointC", "outside_dewpoint"}, | |
43 {"Events", "events"}, | |
44 {"Gust SpeedKm/h", "wind_gust"}, | |
45 {"HourlyPrecipMM", "rain"}, | |
46 {"Humidity", "outside_humidity"}, | |
47 {"PrecipitationCm", ""}, | |
48 {"PressurehPa", "pressure_msl"}, /* I assume this is correct */ | |
49 {"Sea Level PressurehPa", "pressure_msl"}, | |
50 {"SoftwareType", ""}, | |
51 {"TemperatureC", "outside_temp"}, | |
52 {"Time", "date"}, | |
53 {"TimeEST", "time"}, | |
54 {"VisibilityKm", "visibility"}, | |
55 {"Wind Direction", "wind_direction_text"}, | |
56 {"Wind SpeedKm/h", "wind_speed"}, | |
57 {"WindDirection", "wind_direction_text"}, | |
58 {"WindDirectionDegrees", "wind_direction"}, | |
59 {"WindSpeedGustKMH", "wind_gust"}, | |
60 {"WindSpeedKMH", "wind_speed"}, | |
61 {"dailyrainCM", "" }}; | |
62 | |
63 int columns = sizeof (attributes) / sizeof (struct attributes); | |
64 | |
65 int field [50]; /* build up a table of the fields here */ | |
66 | |
67 void skipblanks (char **cp) | |
68 { | |
69 while (**cp == ' ') | |
70 (*cp)++; | |
71 } | |
72 | |
73 int main (int argc, char *argv []) | |
74 { | |
75 char *cp; /* pointers in buffers */ | |
76 char *ep; /* pointers in buffers */ | |
77 char *fp; | |
78 char fieldname [64]; | |
79 int fields = 0; /* becomes number of fields in table */ | |
80 int myfield; | |
81 int i; | |
82 | |
83 if (argc < 2) | |
84 { | |
85 fprintf (stderr, "Usage: $0 < file STATION_ID\n"); | |
86 exit (1); | |
87 } | |
88 station = argv [1]; /* arg is station name */ | |
89 if (argc > 2) /* date specified too */ | |
90 date = argv [2]; | |
91 | |
92 /* | |
93 * The first useful line should be a list of field names. Read it in and | |
94 * decide what we want to use. | |
95 */ | |
96 do | |
97 { | |
98 if (! fgets (attribute_list, BUFSIZE - 1, stdin)) | |
99 exit (0); /* empty file, just ignore */ | |
100 } | |
101 while (strlen (attribute_list) < 4); /* I'm not sure how long the | |
102 * initial junk is, but this | |
103 * should cover it */ | |
104 /* | |
105 * Untidy copy of field names. | |
106 */ | |
107 cp = attribute_list; | |
108 while (1) | |
109 { | |
110 fp = fieldname; | |
111 skipblanks (&cp); | |
112 while ((*cp != ',') && (*cp !='0') && (*cp !='<')) | |
113 *fp++ = *cp++; /* copy the character */ | |
114 *fp = '\0'; /* and delimit it */ | |
115 for (i = 0; i < columns; i++) | |
116 if (! strcmp (fieldname, attributes [i].wundername)) /* found it */ | |
117 { | |
118 field [fields] = i; | |
119 break; | |
120 } | |
121 if (i == columns) /* not found */ | |
122 { | |
123 fprintf (stderr, "Warning: can't find field name %s\n", fieldname); | |
124 field [fields] = -1; /* don't use this field */ | |
125 } | |
126 if (strlen (attributes [i].column_name) == 0) /* no column */ | |
127 field [fields] = -1; /* */ | |
128 if ((*cp == '<') || (*cp == '\0')) /* end of useful info */ | |
129 break; | |
130 cp++; /* skip the delimiter */ | |
131 fields++; /* done another field */ | |
132 } | |
133 /* | |
134 * On exit from the loop, fields contains the number of fields, and field [] | |
135 * is a translation table. | |
136 */ | |
137 while (1) | |
138 { | |
139 if (! fgets (buf, BUFSIZE - 1, stdin)) | |
140 exit (0); /* empty file, just ignore */ | |
141 if (memcmp (buf, "20", 2) == 0) /* we have a line starting with at date */ | |
142 { | |
143 char *time; | |
144 | |
145 /* walk through the date field and convert it to two fields */ | |
146 sprintf (sql_command, "REPLACE INTO remote_observations (station_id, date, time"); | |
147 /* first get the names */ | |
148 for (myfield = 1; myfield < fields; myfield++) | |
149 { | |
150 if (field [myfield] >= 0) | |
151 sprintf (&sql_command [strlen (sql_command)], ", %s", attributes [field [myfield]].column_name); | |
152 } | |
153 sprintf (&sql_command [strlen (sql_command)], ") VALUES ("); | |
154 /* and now the values */ | |
155 cp = buf; | |
156 while (*cp != ' ') | |
157 cp++; | |
158 *cp++ = '\0'; /* split first field */ | |
159 skipblanks (&cp); /* this shouldn't be necessary */ | |
160 time = cp; /* second is time */ | |
161 while (*cp != ',') | |
162 cp++; /* delimit */ | |
163 *cp++ = '\0'; /* delimit second field */ | |
164 /* | |
165 * We now have buf pointing to the date, time pointing to the time, both | |
166 * \0 delimited, and cp pointing to the rest of the buffer. | |
167 */ | |
168 sprintf (&sql_command [strlen (sql_command)], | |
169 "\"%s\", \"%s\", \"%s\"", | |
170 station, | |
171 buf, | |
172 time ); | |
173 /* | |
174 * Do the rest | |
175 */ | |
176 for (myfield = 1; myfield < fields; myfield++) | |
177 { | |
178 skipblanks (&cp); | |
179 ep = cp; | |
180 while ((*ep != ',') && (*ep != '\n')) | |
181 ep++; | |
182 *ep++ = '\0'; /* make a string of it */ | |
183 if (field [myfield] >= 0) | |
184 sprintf (&sql_command [strlen (sql_command)], ", \"%s\"", cp); | |
185 cp = ep; /* point to next field */ | |
186 /* | |
187 * Strictly speaking this isn't necessary, since we have a counted | |
188 * number of fields, but I don't trust the data. | |
189 */ | |
190 if (*cp == '\0') | |
191 break; | |
192 } | |
193 sprintf (&sql_command [strlen (sql_command)], ");"); | |
194 puts (sql_command); | |
195 } | |
196 else if (isdigit (buf [0])) /* must be a time */ | |
197 { | |
198 sprintf (sql_command, "REPLACE INTO remote_observations (station_id, date"); | |
199 | |
200 | |
201 /* first get the names */ | |
202 for (myfield = 0; myfield < fields; myfield++) | |
203 { | |
204 if (field [myfield] >= 0) | |
205 sprintf (&sql_command [strlen (sql_command)], ", %s", attributes [field [myfield]].column_name); | |
206 } | |
207 sprintf (&sql_command [strlen (sql_command)], ") VALUES (\"%s\", \"%s\"", | |
208 station, | |
209 date ); | |
210 /* | |
211 * Do the rest | |
212 */ | |
213 cp = buf; | |
214 for (myfield = 0; myfield < fields; myfield++) | |
215 { | |
216 skipblanks (&cp); | |
217 ep = cp; | |
218 while ((*ep != ',') && (*ep != '\n')) | |
219 ep++; | |
220 *ep++ = '\0'; /* make a string of it */ | |
221 if (field [myfield] >= 0) | |
222 sprintf (&sql_command [strlen (sql_command)], ", \"%s\"", cp); | |
223 cp = ep; /* point to next field */ | |
224 /* | |
225 * Strictly speaking this isn't necessary, since we have a counted | |
226 * number of fields, but I don't trust the data. | |
227 */ | |
228 if (*cp == '\0') | |
229 break; | |
230 } | |
231 sprintf (&sql_command [strlen (sql_command)], ");"); | |
232 puts (sql_command); | |
233 } | |
234 } | |
235 return 0; | |
236 } | |
237 |