comparison db/insertwunderground.c @ 0:9dab44dcb331

Initial commit of Greg's code from http://www.lemis.com/grog/tmp/wh1080.tar.gz
author Daniel O'Connor <darius@dons.net.au>
date Tue, 09 Feb 2010 13:44:25 +1030
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9dab44dcb331
1 /*
2 * Read in Wunderground comma-delimited files and convert them into INSERT
3 * commands for MySQL.
4 *
5 * $Id: insertwunderground.c,v 1.5 2009/12/09 03:27:55 grog Exp $
6 */
7 #include <ctype.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <sys/file.h>
12 #include <sys/stat.h>
13 #include <sys/types.h>
14 #include <unistd.h>
15
16
17 #define BUFSIZE 1024
18 char attribute_list [BUFSIZE];
19 char buf [BUFSIZE];
20 char sql_command [BUFSIZE];
21 int attribute_count;
22 char *station; /* name of station */
23 char *date; /* and date from second parameter, if present */
24
25 /*
26 * Convert the Wunderground field names to our table. This is confused by the
27 * fact that Wunderground can't make up its mind what to call the fields, so
28 * multiple names are possible, and they can have spaces in them. If we don't
29 * care about the field, we have a null translation.
30 *
31 * Some fields are problems: time is really two fields, date and time, and
32 * precipitation is sometimes in cm rather than mm. We special-case them.
33 */
34 struct attributes
35 {
36 char *wundername; /* name supplied in file */
37 char *column_name; /* and the names we use */
38 } attributes [] = {
39 {"Clouds", "clouds"},
40 {"Conditions", "conditions"},
41 {"Dew PointC", "outside_dewpoint"},
42 {"DewpointC", "outside_dewpoint"},
43 {"Events", "events"},
44 {"Gust SpeedKm/h", "wind_gust"},
45 {"HourlyPrecipMM", "rain"},
46 {"Humidity", "outside_humidity"},
47 {"PrecipitationCm", ""},
48 {"PressurehPa", "pressure_msl"}, /* I assume this is correct */
49 {"Sea Level PressurehPa", "pressure_msl"},
50 {"SoftwareType", ""},
51 {"TemperatureC", "outside_temp"},
52 {"Time", "date"},
53 {"TimeEST", "time"},
54 {"VisibilityKm", "visibility"},
55 {"Wind Direction", "wind_direction_text"},
56 {"Wind SpeedKm/h", "wind_speed"},
57 {"WindDirection", "wind_direction_text"},
58 {"WindDirectionDegrees", "wind_direction"},
59 {"WindSpeedGustKMH", "wind_gust"},
60 {"WindSpeedKMH", "wind_speed"},
61 {"dailyrainCM", "" }};
62
63 int columns = sizeof (attributes) / sizeof (struct attributes);
64
65 int field [50]; /* build up a table of the fields here */
66
67 void skipblanks (char **cp)
68 {
69 while (**cp == ' ')
70 (*cp)++;
71 }
72
73 int main (int argc, char *argv [])
74 {
75 char *cp; /* pointers in buffers */
76 char *ep; /* pointers in buffers */
77 char *fp;
78 char fieldname [64];
79 int fields = 0; /* becomes number of fields in table */
80 int myfield;
81 int i;
82
83 if (argc < 2)
84 {
85 fprintf (stderr, "Usage: $0 < file STATION_ID\n");
86 exit (1);
87 }
88 station = argv [1]; /* arg is station name */
89 if (argc > 2) /* date specified too */
90 date = argv [2];
91
92 /*
93 * The first useful line should be a list of field names. Read it in and
94 * decide what we want to use.
95 */
96 do
97 {
98 if (! fgets (attribute_list, BUFSIZE - 1, stdin))
99 exit (0); /* empty file, just ignore */
100 }
101 while (strlen (attribute_list) < 4); /* I'm not sure how long the
102 * initial junk is, but this
103 * should cover it */
104 /*
105 * Untidy copy of field names.
106 */
107 cp = attribute_list;
108 while (1)
109 {
110 fp = fieldname;
111 skipblanks (&cp);
112 while ((*cp != ',') && (*cp !='0') && (*cp !='<'))
113 *fp++ = *cp++; /* copy the character */
114 *fp = '\0'; /* and delimit it */
115 for (i = 0; i < columns; i++)
116 if (! strcmp (fieldname, attributes [i].wundername)) /* found it */
117 {
118 field [fields] = i;
119 break;
120 }
121 if (i == columns) /* not found */
122 {
123 fprintf (stderr, "Warning: can't find field name %s\n", fieldname);
124 field [fields] = -1; /* don't use this field */
125 }
126 if (strlen (attributes [i].column_name) == 0) /* no column */
127 field [fields] = -1; /* */
128 if ((*cp == '<') || (*cp == '\0')) /* end of useful info */
129 break;
130 cp++; /* skip the delimiter */
131 fields++; /* done another field */
132 }
133 /*
134 * On exit from the loop, fields contains the number of fields, and field []
135 * is a translation table.
136 */
137 while (1)
138 {
139 if (! fgets (buf, BUFSIZE - 1, stdin))
140 exit (0); /* empty file, just ignore */
141 if (memcmp (buf, "20", 2) == 0) /* we have a line starting with at date */
142 {
143 char *time;
144
145 /* walk through the date field and convert it to two fields */
146 sprintf (sql_command, "REPLACE INTO remote_observations (station_id, date, time");
147 /* first get the names */
148 for (myfield = 1; myfield < fields; myfield++)
149 {
150 if (field [myfield] >= 0)
151 sprintf (&sql_command [strlen (sql_command)], ", %s", attributes [field [myfield]].column_name);
152 }
153 sprintf (&sql_command [strlen (sql_command)], ") VALUES (");
154 /* and now the values */
155 cp = buf;
156 while (*cp != ' ')
157 cp++;
158 *cp++ = '\0'; /* split first field */
159 skipblanks (&cp); /* this shouldn't be necessary */
160 time = cp; /* second is time */
161 while (*cp != ',')
162 cp++; /* delimit */
163 *cp++ = '\0'; /* delimit second field */
164 /*
165 * We now have buf pointing to the date, time pointing to the time, both
166 * \0 delimited, and cp pointing to the rest of the buffer.
167 */
168 sprintf (&sql_command [strlen (sql_command)],
169 "\"%s\", \"%s\", \"%s\"",
170 station,
171 buf,
172 time );
173 /*
174 * Do the rest
175 */
176 for (myfield = 1; myfield < fields; myfield++)
177 {
178 skipblanks (&cp);
179 ep = cp;
180 while ((*ep != ',') && (*ep != '\n'))
181 ep++;
182 *ep++ = '\0'; /* make a string of it */
183 if (field [myfield] >= 0)
184 sprintf (&sql_command [strlen (sql_command)], ", \"%s\"", cp);
185 cp = ep; /* point to next field */
186 /*
187 * Strictly speaking this isn't necessary, since we have a counted
188 * number of fields, but I don't trust the data.
189 */
190 if (*cp == '\0')
191 break;
192 }
193 sprintf (&sql_command [strlen (sql_command)], ");");
194 puts (sql_command);
195 }
196 else if (isdigit (buf [0])) /* must be a time */
197 {
198 sprintf (sql_command, "REPLACE INTO remote_observations (station_id, date");
199
200
201 /* first get the names */
202 for (myfield = 0; myfield < fields; myfield++)
203 {
204 if (field [myfield] >= 0)
205 sprintf (&sql_command [strlen (sql_command)], ", %s", attributes [field [myfield]].column_name);
206 }
207 sprintf (&sql_command [strlen (sql_command)], ") VALUES (\"%s\", \"%s\"",
208 station,
209 date );
210 /*
211 * Do the rest
212 */
213 cp = buf;
214 for (myfield = 0; myfield < fields; myfield++)
215 {
216 skipblanks (&cp);
217 ep = cp;
218 while ((*ep != ',') && (*ep != '\n'))
219 ep++;
220 *ep++ = '\0'; /* make a string of it */
221 if (field [myfield] >= 0)
222 sprintf (&sql_command [strlen (sql_command)], ", \"%s\"", cp);
223 cp = ep; /* point to next field */
224 /*
225 * Strictly speaking this isn't necessary, since we have a counted
226 * number of fields, but I don't trust the data.
227 */
228 if (*cp == '\0')
229 break;
230 }
231 sprintf (&sql_command [strlen (sql_command)], ");");
232 puts (sql_command);
233 }
234 }
235 return 0;
236 }
237